Skip to content

Commit

Permalink
8300207: Add a pre-check for the number of canonical equivalent permu…
Browse files Browse the repository at this point in the history
…tations in j.u.r.Pattern

Reviewed-by: smarks
  • Loading branch information
rgiulietti committed Jan 22, 2023
1 parent 7ced08d commit 030b071
Showing 1 changed file with 37 additions and 6 deletions.
43 changes: 37 additions & 6 deletions src/java.base/share/classes/java/util/regex/Pattern.java
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -916,7 +916,8 @@ public final class Pattern
* <p> There is no embedded flag character for enabling canonical
* equivalence.
*
* <p> Specifying this flag may impose a performance penalty. </p>
* <p> Specifying this flag may impose a performance penalty
* and a moderate risk of memory exhaustion.</p>
*/
public static final int CANON_EQ = 0x80;

Expand Down Expand Up @@ -1095,6 +1096,9 @@ public static Pattern compile(String regex) {
* Compiles the given regular expression into a pattern with the given
* flags.
*
* <p>Setting {@link #CANON_EQ} among the flags may impose a moderate risk
* of memory exhaustion.</p>
*
* @param regex
* The expression to be compiled
*
Expand All @@ -1112,6 +1116,10 @@ public static Pattern compile(String regex) {
*
* @throws PatternSyntaxException
* If the expression's syntax is invalid
*
* @implNote If {@link #CANON_EQ} is specified and the number of combining
* marks for any character is too large, an {@link java.lang.OutOfMemoryError}
* is thrown.
*/
public static Pattern compile(String regex, int flags) {
return new Pattern(regex, flags);
Expand Down Expand Up @@ -1145,6 +1153,13 @@ public String toString() {
* The character sequence to be matched
*
* @return A new matcher for this pattern
*
* @implNote When a {@link Pattern} is deserialized, compilation is deferred
* until a direct or indirect invocation of this method. Thus, if a
* deserialized pattern has {@link #CANON_EQ} among its flags and the number
* of combining marks for any character is too large, an
* {@link java.lang.OutOfMemoryError} is thrown,
* as in {@link #compile(String, int)}.
*/
public Matcher matcher(CharSequence input) {
if (!compiled) {
Expand Down Expand Up @@ -1618,14 +1633,30 @@ private static String[] producePermutations(String input) {
return result;
}

int length = 1;
/*
* Since
* 12! = 479_001_600 < Integer.MAX_VALUE
* 13! = 6_227_020_800 > Integer.MAX_VALUE
* the computation of n! using int arithmetic will overflow iff
* n < 0 or n > 12
*
* Here, nCodePoints! is computed in the next for-loop below.
* As nCodePoints >= 0, the computation overflows iff nCodePoints > 12.
* In that case, throw OOME to simulate length > Integer.MAX_VALUE.
*/
int nCodePoints = countCodePoints(input);
for(int x=1; x<nCodePoints; x++)
length = length * (x+1);
if (nCodePoints > 12) {
throw new OutOfMemoryError("Pattern too complex");
}

/* Compute length = nCodePoints! */
int length = 1;
for (int x = 2; x <= nCodePoints; ++x) {
length *= x;
}
String[] temp = new String[length];

int combClass[] = new int[nCodePoints];
int[] combClass = new int[nCodePoints];
for(int x=0, i=0; x<nCodePoints; x++) {
int c = Character.codePointAt(input, i);
combClass[x] = getClass(c);
Expand Down

1 comment on commit 030b071

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.