diff --git a/src/java.base/share/classes/java/util/regex/Pattern.java b/src/java.base/share/classes/java/util/regex/Pattern.java index 588665a0acf69..03072638f601e 100644 --- a/src/java.base/share/classes/java/util/regex/Pattern.java +++ b/src/java.base/share/classes/java/util/regex/Pattern.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -916,7 +916,8 @@ public final class Pattern *

There is no embedded flag character for enabling canonical * equivalence. * - *

Specifying this flag may impose a performance penalty.

+ *

Specifying this flag may impose a performance penalty + * and a moderate risk of memory exhaustion.

*/ public static final int CANON_EQ = 0x80; @@ -1095,6 +1096,9 @@ public static Pattern compile(String regex) { * Compiles the given regular expression into a pattern with the given * flags. * + *

Setting {@link #CANON_EQ} among the flags may impose a moderate risk + * of memory exhaustion.

+ * * @param regex * The expression to be compiled * @@ -1112,6 +1116,10 @@ public static Pattern compile(String regex) { * * @throws PatternSyntaxException * If the expression's syntax is invalid + * + * @implNote If {@link #CANON_EQ} is specified and the number of combining + * marks for any character is too large, an {@link java.lang.OutOfMemoryError} + * is thrown. */ public static Pattern compile(String regex, int flags) { return new Pattern(regex, flags); @@ -1145,6 +1153,13 @@ public String toString() { * The character sequence to be matched * * @return A new matcher for this pattern + * + * @implNote When a {@link Pattern} is deserialized, compilation is deferred + * until a direct or indirect invocation of this method. Thus, if a + * deserialized pattern has {@link #CANON_EQ} among its flags and the number + * of combining marks for any character is too large, an + * {@link java.lang.OutOfMemoryError} is thrown, + * as in {@link #compile(String, int)}. */ public Matcher matcher(CharSequence input) { if (!compiled) { @@ -1618,14 +1633,30 @@ private static String[] producePermutations(String input) { return result; } - int length = 1; + /* + * Since + * 12! = 479_001_600 < Integer.MAX_VALUE + * 13! = 6_227_020_800 > Integer.MAX_VALUE + * the computation of n! using int arithmetic will overflow iff + * n < 0 or n > 12 + * + * Here, nCodePoints! is computed in the next for-loop below. + * As nCodePoints >= 0, the computation overflows iff nCodePoints > 12. + * In that case, throw OOME to simulate length > Integer.MAX_VALUE. + */ int nCodePoints = countCodePoints(input); - for(int x=1; x 12) { + throw new OutOfMemoryError("Pattern too complex"); + } + /* Compute length = nCodePoints! */ + int length = 1; + for (int x = 2; x <= nCodePoints; ++x) { + length *= x; + } String[] temp = new String[length]; - int combClass[] = new int[nCodePoints]; + int[] combClass = new int[nCodePoints]; for(int x=0, i=0; x