Skip to content

Commit 00c7f91

Browse files
committedJul 11, 2023
8310047: Add UTF-32 based Charsets into StandardCharsets
Reviewed-by: alanb, lancea, bpb, jpai, jlu
1 parent caadad4 commit 00c7f91

File tree

4 files changed

+60
-10
lines changed

4 files changed

+60
-10
lines changed
 

‎src/java.base/share/classes/java/nio/charset/Charset.java

+23-6
Original file line numberDiff line numberDiff line change
@@ -168,37 +168,54 @@
168168
* <tr><th scope="row" style="vertical-align:top">{@code UTF-16}</th>
169169
* <td>Sixteen-bit UCS Transformation Format,
170170
* byte&nbsp;order identified by an optional byte-order mark</td></tr>
171+
* <tr><th scope="row" style="vertical-align:top">{@code UTF-32BE}</th>
172+
* <td>Thirty-two-bit UCS Transformation Format,
173+
* big-endian byte&nbsp;order</td></tr>
174+
* <tr><th scope="row" style="vertical-align:top">{@code UTF-32LE}</th>
175+
* <td>Thirty-two-bit UCS Transformation Format,
176+
* little-endian byte&nbsp;order</td></tr>
177+
* <tr><th scope="row" style="vertical-align:top">{@code UTF-32}</th>
178+
* <td>Thirty-two-bit UCS Transformation Format,
179+
* byte&nbsp;order identified by an optional byte-order mark</td></tr>
171180
* </tbody>
172181
* </table></blockquote>
173182
*
174183
* <p> The {@code UTF-8} charset is specified by <a
175184
* href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC&nbsp;2279</i></a>; the
176185
* transformation format upon which it is based is specified in
177-
* Amendment&nbsp;2 of ISO&nbsp;10646-1 and is also described in the <a
186+
* ISO&nbsp;10646-1 and is also described in the <a
178187
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
179188
* Standard</i></a>.
180189
*
181190
* <p> The {@code UTF-16} charsets are specified by <a
182191
* href="http://www.ietf.org/rfc/rfc2781.txt"><i>RFC&nbsp;2781</i></a>; the
183192
* transformation formats upon which they are based are specified in
184-
* Amendment&nbsp;1 of ISO&nbsp;10646-1 and are also described in the <a
193+
* ISO&nbsp;10646-1 and are also described in the <a
194+
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
195+
* Standard</i></a>.
196+
*
197+
* <p> The {@code UTF-32} charsets are based upon transformation formats
198+
* which are specified in
199+
* ISO&nbsp;10646-1 and are also described in the <a
185200
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
186201
* Standard</i></a>.
187202
*
188-
* <p> The {@code UTF-16} charsets use sixteen-bit quantities and are
203+
* <p> The {@code UTF-16} and {@code UTF-32} charsets use sixteen-bit and thirty-two-bit
204+
* quantities respectively, and are
189205
* therefore sensitive to byte order. In these encodings the byte order of a
190206
* stream may be indicated by an initial <i>byte-order mark</i> represented by
191-
* the Unicode character <code>'&#92;uFEFF'</code>. Byte-order marks are handled
207+
* the Unicode character {@code U+FEFF}. Byte-order marks are handled
192208
* as follows:
193209
*
194210
* <ul>
195211
*
196-
* <li><p> When decoding, the {@code UTF-16BE} and {@code UTF-16LE}
212+
* <li><p> When decoding, the {@code UTF-16BE}, {@code UTF-16LE},
213+
* {@code UTF-32BE}, and {@code UTF-32LE}
197214
* charsets interpret the initial byte-order marks as a <small>ZERO-WIDTH
198215
* NON-BREAKING SPACE</small>; when encoding, they do not write
199216
* byte-order marks. </p></li>
200217
*
201-
* <li><p> When decoding, the {@code UTF-16} charset interprets the
218+
* <li><p> When decoding, the {@code UTF-16} and {@code UTF-32} charsets interpret the
202219
* byte-order mark at the beginning of the input stream to indicate the
203220
* byte-order of the stream but defaults to big-endian if there is no
204221
* byte-order mark; when encoding, it uses big-endian byte order and writes

‎src/java.base/share/classes/java/nio/charset/StandardCharsets.java

+20-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -74,4 +74,23 @@ private StandardCharsets() {
7474
* optional byte-order mark.
7575
*/
7676
public static final Charset UTF_16 = new sun.nio.cs.UTF_16();
77+
78+
/**
79+
* Thirty-two-bit UCS Transformation Format, big-endian byte order.
80+
* @since 22
81+
*/
82+
public static final Charset UTF_32BE = new sun.nio.cs.UTF_32BE();
83+
84+
/**
85+
* Thirty-two-bit UCS Transformation Format, little-endian byte order.
86+
* @since 22
87+
*/
88+
public static final Charset UTF_32LE = new sun.nio.cs.UTF_32LE();
89+
90+
/**
91+
* Thirty-two-bit UCS Transformation Format, byte order identified by an
92+
* optional byte-order mark.
93+
* @since 22
94+
*/
95+
public static final Charset UTF_32 = new sun.nio.cs.UTF_32();
7796
}

‎src/java.base/share/classes/sun/nio/cs/StandardCharsets.java.template

+3
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ public class StandardCharsets extends CharsetProvider {
9393
map.put("utf-16", java.nio.charset.StandardCharsets.UTF_16);
9494
map.put("utf-16be", java.nio.charset.StandardCharsets.UTF_16BE);
9595
map.put("utf-16le", java.nio.charset.StandardCharsets.UTF_16LE);
96+
map.put("utf-32", java.nio.charset.StandardCharsets.UTF_32);
97+
map.put("utf-32be", java.nio.charset.StandardCharsets.UTF_32BE);
98+
map.put("utf-32le", java.nio.charset.StandardCharsets.UTF_32LE);
9699
cache = map;
97100
}
98101
return map;

‎test/jdk/java/nio/charset/StandardCharsets/Standard.java

+14-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,7 @@
2323

2424
/*
2525
* @test
26-
* @bug 4884238
26+
* @bug 4884238 8310047
2727
* @summary Test standard charset name constants.
2828
* @author Mike Duigou
2929
* @run main Standard
@@ -41,7 +41,9 @@ public class Standard {
4141

4242
private final static String standardCharsets[] = {
4343
"US-ASCII", "ISO-8859-1", "UTF-8",
44-
"UTF-16BE", "UTF-16LE", "UTF-16" };
44+
"UTF-16BE", "UTF-16LE", "UTF-16",
45+
"UTF-32BE", "UTF-32LE", "UTF-32",
46+
};
4547

4648
public static void realMain(String[] args) {
4749
check(StandardCharsets.US_ASCII instanceof Charset);
@@ -50,20 +52,29 @@ public static void realMain(String[] args) {
5052
check(StandardCharsets.UTF_16BE instanceof Charset);
5153
check(StandardCharsets.UTF_16LE instanceof Charset);
5254
check(StandardCharsets.UTF_16 instanceof Charset);
55+
check(StandardCharsets.UTF_32BE instanceof Charset);
56+
check(StandardCharsets.UTF_32LE instanceof Charset);
57+
check(StandardCharsets.UTF_32 instanceof Charset);
5358

5459
check("US-ASCII".equals(StandardCharsets.US_ASCII.name()));
5560
check("ISO-8859-1".equals(StandardCharsets.ISO_8859_1.name()));
5661
check("UTF-8".equals(StandardCharsets.UTF_8.name()));
5762
check("UTF-16BE".equals(StandardCharsets.UTF_16BE.name()));
5863
check("UTF-16LE".equals(StandardCharsets.UTF_16LE.name()));
5964
check("UTF-16".equals(StandardCharsets.UTF_16.name()));
65+
check("UTF-32BE".equals(StandardCharsets.UTF_32BE.name()));
66+
check("UTF-32LE".equals(StandardCharsets.UTF_32LE.name()));
67+
check("UTF-32".equals(StandardCharsets.UTF_32.name()));
6068

6169
check(Charset.forName("US-ASCII") == StandardCharsets.US_ASCII);
6270
check(Charset.forName("ISO-8859-1") == StandardCharsets.ISO_8859_1);
6371
check(Charset.forName("UTF-8") == StandardCharsets.UTF_8);
6472
check(Charset.forName("UTF-16BE") == StandardCharsets.UTF_16BE);
6573
check(Charset.forName("UTF-16LE") == StandardCharsets.UTF_16LE);
6674
check(Charset.forName("UTF-16") == StandardCharsets.UTF_16);
75+
check(Charset.forName("UTF-32BE") == StandardCharsets.UTF_32BE);
76+
check(Charset.forName("UTF-32LE") == StandardCharsets.UTF_32LE);
77+
check(Charset.forName("UTF-32") == StandardCharsets.UTF_32);
6778

6879
Set<String> charsets = new HashSet<>();
6980
Field standardCharsetFields[] = StandardCharsets.class.getFields();

0 commit comments

Comments
 (0)
Please sign in to comment.