Skip to content

Commit 50b17d9

Browse files
wenshaorgiulietti
authored andcommittedFeb 6, 2024
8316704: Regex-free parsing of Formatter and FormatProcessor specifiers
Reviewed-by: redestad, rgiulietti
1 parent 51d7169 commit 50b17d9

File tree

5 files changed

+293
-38
lines changed

5 files changed

+293
-38
lines changed
 

‎src/java.base/share/classes/java/util/FormatProcessor.java

+26-13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2023, Alibaba Group Holding Limited. All Rights Reserved.
34
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
45
*
56
* This code is free software; you can redistribute it and/or modify it
@@ -30,7 +31,6 @@
3031
import java.lang.invoke.MethodType;
3132
import java.lang.StringTemplate.Processor;
3233
import java.lang.StringTemplate.Processor.Linkage;
33-
import java.util.regex.Matcher;
3434

3535
import jdk.internal.javac.PreviewFeature;
3636

@@ -218,22 +218,35 @@ public MethodHandle linkage(List<String> fragments, MethodType type) {
218218
* @throws MissingFormatArgumentException if not at end or found and not needed
219219
*/
220220
private static boolean findFormat(String fragment, boolean needed) {
221-
Matcher matcher = Formatter.FORMAT_SPECIFIER_PATTERN.matcher(fragment);
222-
String group;
223-
224-
while (matcher.find()) {
225-
group = matcher.group();
221+
int max = fragment.length();
222+
for (int i = 0; i < max;) {
223+
int n = fragment.indexOf('%', i);
224+
if (n < 0) {
225+
return false;
226+
}
226227

227-
if (!group.equals("%%") && !group.equals("%n")) {
228-
if (matcher.end() == fragment.length() && needed) {
229-
return true;
230-
}
228+
i = n + 1;
229+
if (i >= max) {
230+
return false;
231+
}
231232

232-
throw new MissingFormatArgumentException(group +
233-
" is not immediately followed by an embedded expression");
233+
char c = fragment.charAt(i);
234+
if (c == '%' || c == 'n') {
235+
i++;
236+
continue;
234237
}
238+
int off = new Formatter.FormatSpecifierParser(null, c, i, fragment, max)
239+
.parse();
240+
if (off == 0) {
241+
return false;
242+
}
243+
if (i + off == max && needed) {
244+
return true;
245+
}
246+
throw new MissingFormatArgumentException(
247+
fragment.substring(i - 1, i + off)
248+
+ " is not immediately followed by an embedded expression");
235249
}
236-
237250
return false;
238251
}
239252

‎src/java.base/share/classes/java/util/Formatter.java

+203-25
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2023, Alibaba Group Holding Limited. All Rights Reserved.
34
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
45
*
56
* This code is free software; you can redistribute it and/or modify it
@@ -49,8 +50,6 @@
4950
import java.text.DecimalFormatSymbols;
5051
import java.text.NumberFormat;
5152
import java.text.spi.NumberFormatProvider;
52-
import java.util.regex.Matcher;
53-
import java.util.regex.Pattern;
5453

5554
import java.time.DateTimeException;
5655
import java.time.Instant;
@@ -2810,20 +2809,14 @@ public Formatter format(Locale l, String format, Object ... args) {
28102809
return this;
28112810
}
28122811

2813-
// %[argument_index$][flags][width][.precision][t]conversion
2814-
static final String FORMAT_SPECIFIER
2815-
= "%(\\d+\\$)?([-#+ 0,(\\<]*)?(\\d+)?(\\.\\d+)?([tT])?([a-zA-Z%])";
2816-
2817-
static final Pattern FORMAT_SPECIFIER_PATTERN = Pattern.compile(FORMAT_SPECIFIER);
2818-
28192812
/**
28202813
* Finds format specifiers in the format string.
28212814
*/
28222815
static List<FormatString> parse(String s) {
2816+
FormatSpecifierParser parser = null;
28232817
ArrayList<FormatString> al = new ArrayList<>();
28242818
int i = 0;
28252819
int max = s.length();
2826-
Matcher m = null; // create if needed
28272820
while (i < max) {
28282821
int n = s.indexOf('%', i);
28292822
if (n < 0) {
@@ -2846,14 +2839,16 @@ static List<FormatString> parse(String s) {
28462839
al.add(new FormatSpecifier(c));
28472840
i++;
28482841
} else {
2849-
if (m == null) {
2850-
m = FORMAT_SPECIFIER_PATTERN.matcher(s);
2851-
}
28522842
// We have already parsed a '%' at n, so we either have a
28532843
// match or the specifier at n is invalid
2854-
if (m.find(n) && m.start() == n) {
2855-
al.add(new FormatSpecifier(s, m));
2856-
i = m.end();
2844+
if (parser == null) {
2845+
parser = new FormatSpecifierParser(al, c, i, s, max);
2846+
} else {
2847+
parser.reset(c, i);
2848+
}
2849+
int off = parser.parse();
2850+
if (off > 0) {
2851+
i += off;
28572852
} else {
28582853
throw new UnknownFormatConversionException(String.valueOf(c));
28592854
}
@@ -2862,6 +2857,159 @@ static List<FormatString> parse(String s) {
28622857
return al;
28632858
}
28642859

2860+
static final class FormatSpecifierParser {
2861+
final ArrayList<FormatString> al;
2862+
final String s;
2863+
final int max;
2864+
char first;
2865+
int start;
2866+
int off;
2867+
char c;
2868+
int argSize;
2869+
int flagSize;
2870+
int widthSize;
2871+
2872+
FormatSpecifierParser(ArrayList<FormatString> al, char first, int start, String s, int max) {
2873+
this.al = al;
2874+
2875+
this.first = first;
2876+
this.c = first;
2877+
this.start = start;
2878+
this.off = start;
2879+
2880+
this.s = s;
2881+
this.max = max;
2882+
}
2883+
2884+
void reset(char first, int start) {
2885+
this.first = first;
2886+
this.c = first;
2887+
this.start = start;
2888+
this.off = start;
2889+
2890+
argSize = 0;
2891+
flagSize = 0;
2892+
widthSize = 0;
2893+
}
2894+
2895+
/**
2896+
* If a valid format specifier is found, construct a FormatString and add it to {@link #al}.
2897+
* The format specifiers for general, character, and numeric types have
2898+
* the following syntax:
2899+
*
2900+
* <blockquote><pre>
2901+
* %[argument_index$][flags][width][.precision]conversion
2902+
* </pre></blockquote>
2903+
*
2904+
* As described by the following regular expression:
2905+
*
2906+
* <blockquote><pre>
2907+
* %(\d+\$)?([-#+ 0,(\<]*)?(\d+)?(\.\d+)?([tT])?([a-zA-Z%])
2908+
* </pre></blockquote>
2909+
*
2910+
* @return the length of the format specifier. If no valid format specifier is found, 0 is returned.
2911+
*/
2912+
int parse() {
2913+
int precisionSize = 0;
2914+
2915+
// (\d+\$)?
2916+
parseArgument();
2917+
2918+
// ([-#+ 0,(\<]*)?
2919+
parseFlag();
2920+
2921+
// (\d+)?
2922+
parseWidth();
2923+
2924+
if (c == '.') {
2925+
// (\.\d+)?
2926+
precisionSize = parsePrecision();
2927+
if (precisionSize == -1) {
2928+
return 0;
2929+
}
2930+
}
2931+
2932+
// ([tT])?([a-zA-Z%])
2933+
char t = '\0', conversion = '\0';
2934+
if ((c == 't' || c == 'T') && off + 1 < max) {
2935+
char c1 = s.charAt(off + 1);
2936+
if (isConversion(c1)) {
2937+
t = c;
2938+
conversion = c1;
2939+
off += 2;
2940+
}
2941+
} else if (isConversion(c)) {
2942+
conversion = c;
2943+
++off;
2944+
} else {
2945+
return 0;
2946+
}
2947+
2948+
if (argSize + flagSize + widthSize + precisionSize + t + conversion != 0) {
2949+
if (al != null) {
2950+
FormatSpecifier formatSpecifier
2951+
= new FormatSpecifier(s, start, argSize, flagSize, widthSize, precisionSize, t, conversion);
2952+
al.add(formatSpecifier);
2953+
}
2954+
return off - start;
2955+
}
2956+
return 0;
2957+
}
2958+
2959+
private void parseArgument() {
2960+
// (\d+\$)?
2961+
int i = off;
2962+
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
2963+
if (i == off || c != '$') {
2964+
c = first;
2965+
return;
2966+
}
2967+
2968+
i++; // skip '$'
2969+
if (i < max) {
2970+
c = s.charAt(i);
2971+
}
2972+
2973+
argSize = i - off;
2974+
off = i;
2975+
}
2976+
2977+
private void parseFlag() {
2978+
// ([-#+ 0,(\<]*)?
2979+
int i = off;
2980+
for (; i < max && Flags.isFlag(c = s.charAt(i)); ++i); // empty body
2981+
flagSize = i - off;
2982+
off = i;
2983+
}
2984+
2985+
private void parseWidth() {
2986+
// (\d+)?
2987+
int i = off;
2988+
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
2989+
widthSize = i - off;
2990+
off = i;
2991+
}
2992+
2993+
private int parsePrecision() {
2994+
int i = ++off;
2995+
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
2996+
if (i != off) {
2997+
int size = i - off + 1;
2998+
off = i;
2999+
return size;
3000+
}
3001+
return -1;
3002+
}
3003+
}
3004+
3005+
static boolean isConversion(char c) {
3006+
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '%';
3007+
}
3008+
3009+
private static boolean isDigit(char c) {
3010+
return c >= '0' && c <= '9';
3011+
}
3012+
28653013
interface FormatString {
28663014
int index();
28673015
void print(Formatter fmt, Object arg, Locale l) throws IOException;
@@ -2984,21 +3132,44 @@ private void conversion(char conv) {
29843132
}
29853133
}
29863134

2987-
FormatSpecifier(String s, Matcher m) {
2988-
index(s, m.start(1), m.end(1));
2989-
flags(s, m.start(2), m.end(2));
2990-
width(s, m.start(3), m.end(3));
2991-
precision(s, m.start(4), m.end(4));
2992-
2993-
int tTStart = m.start(5);
2994-
if (tTStart >= 0) {
3135+
FormatSpecifier(
3136+
String s,
3137+
int i,
3138+
int argSize,
3139+
int flagSize,
3140+
int widthSize,
3141+
int precisionSize,
3142+
char t,
3143+
char conversion
3144+
) {
3145+
int argEnd = i + argSize;
3146+
int flagEnd = argEnd + flagSize;
3147+
int widthEnd = flagEnd + widthSize;
3148+
int precisionEnd = widthEnd + precisionSize;
3149+
3150+
if (argSize > 0) {
3151+
index(s, i, argEnd);
3152+
}
3153+
if (flagSize > 0) {
3154+
flags(s, argEnd, flagEnd);
3155+
}
3156+
if (widthSize > 0) {
3157+
width(s, flagEnd, widthEnd);
3158+
}
3159+
if (precisionSize > 0) {
3160+
precision(s, widthEnd, precisionEnd);
3161+
}
3162+
if (t != '\0') {
29953163
dt = true;
2996-
if (s.charAt(tTStart) == 'T') {
3164+
if (t == 'T') {
29973165
flags = Flags.add(flags, Flags.UPPERCASE);
29983166
}
29993167
}
3000-
conversion(s.charAt(m.start(6)));
3168+
conversion(conversion);
3169+
check();
3170+
}
30013171

3172+
private void check() {
30023173
if (dt)
30033174
checkDateTime();
30043175
else if (Conversion.isGeneral(c))
@@ -4705,6 +4876,13 @@ private static int parse(char c) {
47054876
};
47064877
}
47074878

4879+
private static boolean isFlag(char c) {
4880+
return switch (c) {
4881+
case '-', '#', '+', ' ', '0', ',', '(', '<' -> true;
4882+
default -> false;
4883+
};
4884+
}
4885+
47084886
// Returns a string representation of the current {@code Flags}.
47094887
public static String toString(int f) {
47104888
StringBuilder sb = new StringBuilder();

‎test/jdk/java/lang/template/FormatterBuilder.java

+46
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
import java.util.FormatProcessor;
3232
import java.util.Objects;
3333
import java.util.Locale;
34+
import java.util.MissingFormatArgumentException;
35+
import java.util.UnknownFormatConversionException;
3436

3537
import static java.util.FormatProcessor.FMT;
3638

@@ -50,6 +52,28 @@ static void test(String a, String b) {
5052
}
5153
}
5254

55+
public interface Executable {
56+
void execute() throws Throwable;
57+
}
58+
59+
static <T extends Throwable> void assertThrows(Class<T> expectedType, Executable executable, String message) {
60+
Throwable actualException = null;
61+
try {
62+
executable.execute();
63+
} catch (Throwable e) {
64+
actualException = e;
65+
}
66+
if (actualException == null) {
67+
throw new RuntimeException("Expected " + expectedType + " to be thrown, but nothing was thrown.");
68+
}
69+
if (!expectedType.isInstance(actualException)) {
70+
throw new RuntimeException("Expected " + expectedType + " to be thrown, but was thrown " + actualException.getClass());
71+
}
72+
if (message != null && !message.equals(actualException.getMessage())) {
73+
throw new RuntimeException("Expected " + message + " to be thrown, but was thrown " + actualException.getMessage());
74+
}
75+
}
76+
5377
static void suite(FormatProcessor fmt) {
5478
Object nullObject = null;
5579
test(String.format("%b", false), fmt."%b\{false}");
@@ -911,5 +935,27 @@ static void suite(FormatProcessor fmt) {
911935
test(String.format("%-10A", -12345.6), fmt."%-10A\{-12345.6}");
912936
test(String.format("%-10A", 0.0), fmt."%-10A\{0.0}");
913937
test(String.format("%-10A", 12345.6), fmt."%-10A\{12345.6}");
938+
939+
test("aaa%false", fmt."aaa%%%b\{false}");
940+
test("aaa" + System.lineSeparator() + "false", fmt."aaa%n%b\{false}");
941+
942+
assertThrows(
943+
MissingFormatArgumentException.class,
944+
() -> fmt. "%10ba\{ false }",
945+
"Format specifier '%10b is not immediately followed by an embedded expression'");
946+
947+
assertThrows(
948+
MissingFormatArgumentException.class,
949+
() ->fmt. "%ba\{ false }",
950+
"Format specifier '%b is not immediately followed by an embedded expression'");
951+
952+
assertThrows(
953+
MissingFormatArgumentException.class,
954+
() ->fmt. "%b",
955+
"Format specifier '%b is not immediately followed by an embedded expression'");
956+
assertThrows(
957+
UnknownFormatConversionException.class,
958+
() ->fmt. "%0",
959+
"Conversion = '0'");
914960
}
915961
}

‎test/jdk/java/util/Formatter/Basic.java

+11
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.io.*;
2525
import java.util.Formatter;
2626
import java.util.Locale;
27+
import java.util.UnknownFormatConversionException;
2728

2829
public class Basic {
2930

@@ -168,6 +169,8 @@ private static void ck(String fs, String exp, String got) {
168169
}
169170

170171
public static void main(String[] args) {
172+
common();
173+
171174
BasicBoolean.test();
172175
BasicBooleanObject.test();
173176
BasicByte.test();
@@ -197,4 +200,12 @@ public static void main(String[] args) {
197200
System.out.printf("All %d tests passed", pass);
198201
}
199202
}
203+
204+
private static void common() {
205+
// non-conversion
206+
tryCatch("%12", UnknownFormatConversionException.class);
207+
tryCatch("% ", UnknownFormatConversionException.class);
208+
tryCatch("%,", UnknownFormatConversionException.class);
209+
tryCatch("%03.2", UnknownFormatConversionException.class);
210+
}
200211
}

‎test/micro/org/openjdk/bench/java/lang/StringFormat.java

+7
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.openjdk.jmh.annotations.State;
3333
import org.openjdk.jmh.annotations.Warmup;
3434

35+
import java.math.BigDecimal;
3536
import java.util.concurrent.TimeUnit;
3637

3738
/*
@@ -47,6 +48,12 @@ public class StringFormat {
4748

4849
public String s = "str";
4950
public int i = 17;
51+
public static final BigDecimal pi = new BigDecimal(Math.PI);
52+
53+
@Benchmark
54+
public String decimalFormat() {
55+
return "%010.3f".formatted(pi);
56+
}
5057

5158
@Benchmark
5259
public String stringFormat() {

0 commit comments

Comments
 (0)
Please sign in to comment.