8310047: Add UTF-32 based Charsets into StandardCharsets

naotoj · naotoj · commit 00c7f914c665 · 2023-07-11T16:10:34.000Z
Reviewed-by: alanb, lancea, bpb, jpai, jlu
diff --git a/src/java.base/share/classes/java/nio/charset/Charset.java b/src/java.base/share/classes/java/nio/charset/Charset.java
@@ -168,37 +168,54 @@
  * <tr><th scope="row" style="vertical-align:top">{@code UTF-16}</th>
  *     <td>Sixteen-bit UCS Transformation Format,
  *         byte&nbsp;order identified by an optional byte-order mark</td></tr>
+ * <tr><th scope="row" style="vertical-align:top">{@code UTF-32BE}</th>
+ *     <td>Thirty-two-bit UCS Transformation Format,
+ *         big-endian byte&nbsp;order</td></tr>
+ * <tr><th scope="row" style="vertical-align:top">{@code UTF-32LE}</th>
+ *     <td>Thirty-two-bit UCS Transformation Format,
+ *         little-endian byte&nbsp;order</td></tr>
+ * <tr><th scope="row" style="vertical-align:top">{@code UTF-32}</th>
+ *     <td>Thirty-two-bit UCS Transformation Format,
+ *         byte&nbsp;order identified by an optional byte-order mark</td></tr>
  * </tbody>
  * </table></blockquote>
  *
  * <p> The {@code UTF-8} charset is specified by <a
  * href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC&nbsp;2279</i></a>; the
  * transformation format upon which it is based is specified in
- * Amendment&nbsp;2 of ISO&nbsp;10646-1 and is also described in the <a
+ * ISO&nbsp;10646-1 and is also described in the <a
  * href="http://www.unicode.org/standard/standard.html"><i>Unicode
  * Standard</i></a>.
  *
  * <p> The {@code UTF-16} charsets are specified by <a
  * href="http://www.ietf.org/rfc/rfc2781.txt"><i>RFC&nbsp;2781</i></a>; the
  * transformation formats upon which they are based are specified in
- * Amendment&nbsp;1 of ISO&nbsp;10646-1 and are also described in the <a
+ * ISO&nbsp;10646-1 and are also described in the <a
+ * href="http://www.unicode.org/standard/standard.html"><i>Unicode
+ * Standard</i></a>.
+ *
+ * <p> The {@code UTF-32} charsets are based upon transformation formats
+ * which are specified in
+ * ISO&nbsp;10646-1 and are also described in the <a
  * href="http://www.unicode.org/standard/standard.html"><i>Unicode
  * Standard</i></a>.
  *
- * <p> The {@code UTF-16} charsets use sixteen-bit quantities and are
+ * <p> The {@code UTF-16} and {@code UTF-32} charsets use sixteen-bit and thirty-two-bit
+ * quantities respectively, and are
  * therefore sensitive to byte order.  In these encodings the byte order of a
  * stream may be indicated by an initial <i>byte-order mark</i> represented by
- * the Unicode character <code>'&#92;uFEFF'</code>.  Byte-order marks are handled
+ * the Unicode character {@code U+FEFF}.  Byte-order marks are handled
  * as follows:
  *
  * <ul>
  *
- *   <li><p> When decoding, the {@code UTF-16BE} and {@code UTF-16LE}
+ *   <li><p> When decoding, the {@code UTF-16BE}, {@code UTF-16LE},
+ *   {@code UTF-32BE}, and {@code UTF-32LE}
  *   charsets interpret the initial byte-order marks as a <small>ZERO-WIDTH
  *   NON-BREAKING SPACE</small>; when encoding, they do not write
  *   byte-order marks. </p></li>
  *
- *   <li><p> When decoding, the {@code UTF-16} charset interprets the
+ *   <li><p> When decoding, the {@code UTF-16} and {@code UTF-32} charsets interpret the
  *   byte-order mark at the beginning of the input stream to indicate the
  *   byte-order of the stream but defaults to big-endian if there is no
  *   byte-order mark; when encoding, it uses big-endian byte order and writes
diff --git a/src/java.base/share/classes/java/nio/charset/StandardCharsets.java b/src/java.base/share/classes/java/nio/charset/StandardCharsets.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,4 +74,23 @@ private StandardCharsets() {
      * optional byte-order mark.
      */
     public static final Charset UTF_16 = new sun.nio.cs.UTF_16();
+
+    /**
+     * Thirty-two-bit UCS Transformation Format, big-endian byte order.
+     * @since 22
+     */
+    public static final Charset UTF_32BE = new sun.nio.cs.UTF_32BE();
+
+    /**
+     * Thirty-two-bit UCS Transformation Format, little-endian byte order.
+     * @since 22
+     */
+    public static final Charset UTF_32LE = new sun.nio.cs.UTF_32LE();
+
+    /**
+     * Thirty-two-bit UCS Transformation Format, byte order identified by an
+     * optional byte-order mark.
+     * @since 22
+     */
+    public static final Charset UTF_32 = new sun.nio.cs.UTF_32();
 }
diff --git a/src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template b/src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template
@@ -93,6 +93,9 @@ public class StandardCharsets extends CharsetProvider {
             map.put("utf-16", java.nio.charset.StandardCharsets.UTF_16);
             map.put("utf-16be", java.nio.charset.StandardCharsets.UTF_16BE);
             map.put("utf-16le", java.nio.charset.StandardCharsets.UTF_16LE);
+            map.put("utf-32", java.nio.charset.StandardCharsets.UTF_32);
+            map.put("utf-32be", java.nio.charset.StandardCharsets.UTF_32BE);
+            map.put("utf-32le", java.nio.charset.StandardCharsets.UTF_32LE);
             cache = map;
         }
         return map;
diff --git a/test/jdk/java/nio/charset/StandardCharsets/Standard.java b/test/jdk/java/nio/charset/StandardCharsets/Standard.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,7 @@
 
 /*
  * @test
- * @bug 4884238
+ * @bug 4884238 8310047
  * @summary Test standard charset name constants.
  * @author Mike Duigou
  * @run main Standard
@@ -41,7 +41,9 @@ public class Standard {
 
     private final static String standardCharsets[] = {
         "US-ASCII", "ISO-8859-1", "UTF-8",
-        "UTF-16BE", "UTF-16LE", "UTF-16" };
+        "UTF-16BE", "UTF-16LE", "UTF-16",
+        "UTF-32BE", "UTF-32LE", "UTF-32",
+    };
 
     public static void realMain(String[] args) {
         check(StandardCharsets.US_ASCII instanceof Charset);
@@ -50,20 +52,29 @@ public static void realMain(String[] args) {
         check(StandardCharsets.UTF_16BE instanceof Charset);
         check(StandardCharsets.UTF_16LE instanceof Charset);
         check(StandardCharsets.UTF_16 instanceof Charset);
+        check(StandardCharsets.UTF_32BE instanceof Charset);
+        check(StandardCharsets.UTF_32LE instanceof Charset);
+        check(StandardCharsets.UTF_32 instanceof Charset);
 
         check("US-ASCII".equals(StandardCharsets.US_ASCII.name()));
         check("ISO-8859-1".equals(StandardCharsets.ISO_8859_1.name()));
         check("UTF-8".equals(StandardCharsets.UTF_8.name()));
         check("UTF-16BE".equals(StandardCharsets.UTF_16BE.name()));
         check("UTF-16LE".equals(StandardCharsets.UTF_16LE.name()));
         check("UTF-16".equals(StandardCharsets.UTF_16.name()));
+        check("UTF-32BE".equals(StandardCharsets.UTF_32BE.name()));
+        check("UTF-32LE".equals(StandardCharsets.UTF_32LE.name()));
+        check("UTF-32".equals(StandardCharsets.UTF_32.name()));
 
         check(Charset.forName("US-ASCII") == StandardCharsets.US_ASCII);
         check(Charset.forName("ISO-8859-1") == StandardCharsets.ISO_8859_1);
         check(Charset.forName("UTF-8") == StandardCharsets.UTF_8);
         check(Charset.forName("UTF-16BE") == StandardCharsets.UTF_16BE);
         check(Charset.forName("UTF-16LE") == StandardCharsets.UTF_16LE);
         check(Charset.forName("UTF-16") == StandardCharsets.UTF_16);
+        check(Charset.forName("UTF-32BE") == StandardCharsets.UTF_32BE);
+        check(Charset.forName("UTF-32LE") == StandardCharsets.UTF_32LE);
+        check(Charset.forName("UTF-32") == StandardCharsets.UTF_32);
 
         Set<String> charsets = new HashSet<>();
         Field standardCharsetFields[] = StandardCharsets.class.getFields();