Skip to content

Commit 32efd23

Browse files
deathyrgiulietti
authored andcommittedAug 17, 2023
8311939: Excessive allocation of Matcher.groups array
Reviewed-by: rriggs, igraves
1 parent ed585d1 commit 32efd23

File tree

3 files changed

+66
-4
lines changed

3 files changed

+66
-4
lines changed
 

‎src/java.base/share/classes/java/util/regex/Matcher.java

+2-4
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,7 @@ public final class Matcher implements MatchResult {
247247
this.text = text;
248248

249249
// Allocate state storage
250-
int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
251-
groups = new int[parentGroupCount * 2];
250+
groups = new int[parent.capturingGroupCount * 2];
252251
locals = new int[parent.localCount];
253252
localsPos = new IntHashSet[parent.localTCNCount];
254253

@@ -422,8 +421,7 @@ public Matcher usePattern(Pattern newPattern) {
422421
namedGroups = null;
423422

424423
// Reallocate state storage
425-
int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
426-
groups = new int[parentGroupCount * 2];
424+
groups = new int[newPattern.capturingGroupCount * 2];
427425
locals = new int[newPattern.localCount];
428426
for (int i = 0; i < groups.length; i++)
429427
groups[i] = -1;

‎src/java.base/share/classes/java/util/regex/Pattern.java

+12
Original file line numberDiff line numberDiff line change
@@ -5187,6 +5187,12 @@ static class BackRef extends Node {
51875187
groupIndex = groupCount + groupCount;
51885188
}
51895189
boolean match(Matcher matcher, int i, CharSequence seq) {
5190+
// reference to not existing group must never match
5191+
// group does not exist if matcher didn't allocate space for it
5192+
if (groupIndex >= matcher.groups.length) {
5193+
return false;
5194+
}
5195+
51905196
int j = matcher.groups[groupIndex];
51915197
int k = matcher.groups[groupIndex+1];
51925198

@@ -5223,6 +5229,12 @@ static class CIBackRef extends Node {
52235229
this.doUnicodeCase = doUnicodeCase;
52245230
}
52255231
boolean match(Matcher matcher, int i, CharSequence seq) {
5232+
// reference to not existing group must never match
5233+
// group does not exist if matcher didn't allocate space for it
5234+
if (groupIndex >= matcher.groups.length) {
5235+
return false;
5236+
}
5237+
52265238
int j = matcher.groups[groupIndex];
52275239
int k = matcher.groups[groupIndex+1];
52285240

‎test/jdk/java/util/regex/RegExTest.java

+52
Original file line numberDiff line numberDiff line change
@@ -2041,6 +2041,58 @@ public static void backRefTest() {
20412041
check(pattern, toSupplementaries("abcdefghijkk"), true);
20422042
}
20432043

2044+
@Test
2045+
public static void ciBackRefTest() {
2046+
Pattern pattern = Pattern.compile("(?i)(a*)bc\\1");
2047+
check(pattern, "zzzaabcazzz", true);
2048+
2049+
pattern = Pattern.compile("(?i)(a*)bc\\1");
2050+
check(pattern, "zzzaabcaazzz", true);
2051+
2052+
pattern = Pattern.compile("(?i)(abc)(def)\\1");
2053+
check(pattern, "abcdefabc", true);
2054+
2055+
pattern = Pattern.compile("(?i)(abc)(def)\\3");
2056+
check(pattern, "abcdefabc", false);
2057+
2058+
for (int i = 1; i < 10; i++) {
2059+
// Make sure backref 1-9 are always accepted
2060+
pattern = Pattern.compile("(?i)abcdef\\" + i);
2061+
// and fail to match if the target group does not exit
2062+
check(pattern, "abcdef", false);
2063+
}
2064+
2065+
pattern = Pattern.compile("(?i)(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2066+
check(pattern, "abcdefghija", false);
2067+
check(pattern, "abcdefghija1", true);
2068+
2069+
pattern = Pattern.compile("(?i)(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2070+
check(pattern, "abcdefghijkk", true);
2071+
2072+
pattern = Pattern.compile("(?i)(a)bcdefghij\\11");
2073+
check(pattern, "abcdefghija1", true);
2074+
2075+
// Supplementary character tests
2076+
pattern = Pattern.compile("(?i)" + toSupplementaries("(a*)bc\\1"));
2077+
check(pattern, toSupplementaries("zzzaabcazzz"), true);
2078+
2079+
pattern = Pattern.compile("(?i)" + toSupplementaries("(a*)bc\\1"));
2080+
check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2081+
2082+
pattern = Pattern.compile("(?i)" + toSupplementaries("(abc)(def)\\1"));
2083+
check(pattern, toSupplementaries("abcdefabc"), true);
2084+
2085+
pattern = Pattern.compile("(?i)" + toSupplementaries("(abc)(def)\\3"));
2086+
check(pattern, toSupplementaries("abcdefabc"), false);
2087+
2088+
pattern = Pattern.compile("(?i)" + toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2089+
check(pattern, toSupplementaries("abcdefghija"), false);
2090+
check(pattern, toSupplementaries("abcdefghija1"), true);
2091+
2092+
pattern = Pattern.compile("(?i)" + toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2093+
check(pattern, toSupplementaries("abcdefghijkk"), true);
2094+
}
2095+
20442096
/**
20452097
* Unicode Technical Report #18, section 2.6 End of Line
20462098
* There is no empty line to be matched in the sequence \u000D\u000A

0 commit comments

Comments
 (0)
Please sign in to comment.