Skip to content

Commit 7983194

Browse files
committedJul 17, 2023
8312186: TestStringEncodingFails for UTF-32
Reviewed-by: mcimadamore
1 parent a83f43d commit 7983194

File tree

2 files changed

+38
-6
lines changed

2 files changed

+38
-6
lines changed
 

‎src/java.base/share/classes/jdk/internal/foreign/StringSupport.java

+35-6
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@
2626
package jdk.internal.foreign;
2727

2828
import java.lang.foreign.MemorySegment;
29-
import java.lang.foreign.ValueLayout;
3029
import java.nio.charset.Charset;
3130
import java.nio.charset.StandardCharsets;
3231

3332
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
3433
import static java.lang.foreign.ValueLayout.JAVA_SHORT;
34+
import static java.lang.foreign.ValueLayout.JAVA_INT;
3535

3636
/**
3737
* Miscellaneous functions to read and write strings, in various charsets.
@@ -41,6 +41,7 @@ public static String read(MemorySegment segment, long offset, Charset charset) {
4141
return switch (CharsetKind.of(charset)) {
4242
case SINGLE_BYTE -> readFast_byte(segment, offset, charset);
4343
case DOUBLE_BYTE -> readFast_short(segment, offset, charset);
44+
case QUAD_BYTE -> readFast_int(segment, offset, charset);
4445
default -> throw new UnsupportedOperationException("Unsupported charset: " + charset);
4546
};
4647
}
@@ -49,26 +50,27 @@ public static void write(MemorySegment segment, long offset, Charset charset, St
4950
switch (CharsetKind.of(charset)) {
5051
case SINGLE_BYTE -> writeFast_byte(segment, offset, charset, string);
5152
case DOUBLE_BYTE -> writeFast_short(segment, offset, charset, string);
53+
case QUAD_BYTE -> writeFast_int(segment, offset, charset, string);
5254
default -> throw new UnsupportedOperationException("Unsupported charset: " + charset);
5355
}
5456
}
5557
private static String readFast_byte(MemorySegment segment, long offset, Charset charset) {
5658
long len = strlen_byte(segment, offset);
5759
byte[] bytes = new byte[(int)len];
58-
MemorySegment.copy(segment, ValueLayout.JAVA_BYTE, offset, bytes, 0, (int)len);
60+
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len);
5961
return new String(bytes, charset);
6062
}
6163

6264
private static void writeFast_byte(MemorySegment segment, long offset, Charset charset, String string) {
6365
byte[] bytes = string.getBytes(charset);
64-
MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, offset, bytes.length);
65-
segment.set(ValueLayout.JAVA_BYTE, offset + bytes.length, (byte)0);
66+
MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length);
67+
segment.set(JAVA_BYTE, offset + bytes.length, (byte)0);
6668
}
6769

6870
private static String readFast_short(MemorySegment segment, long offset, Charset charset) {
6971
long len = strlen_short(segment, offset);
7072
byte[] bytes = new byte[(int)len];
71-
MemorySegment.copy(segment, ValueLayout.JAVA_BYTE, offset, bytes, 0, (int)len);
73+
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len);
7274
return new String(bytes, charset);
7375
}
7476

@@ -78,6 +80,19 @@ private static void writeFast_short(MemorySegment segment, long offset, Charset
7880
segment.set(JAVA_SHORT, offset + bytes.length, (short)0);
7981
}
8082

83+
private static String readFast_int(MemorySegment segment, long offset, Charset charset) {
84+
long len = strlen_int(segment, offset);
85+
byte[] bytes = new byte[(int)len];
86+
MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len);
87+
return new String(bytes, charset);
88+
}
89+
90+
private static void writeFast_int(MemorySegment segment, long offset, Charset charset, String string) {
91+
byte[] bytes = string.getBytes(charset);
92+
MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length);
93+
segment.set(JAVA_INT, offset + bytes.length, 0);
94+
}
95+
8196
private static int strlen_byte(MemorySegment segment, long start) {
8297
// iterate until overflow (String can only hold a byte[], whose length can be expressed as an int)
8398
for (int offset = 0; offset >= 0; offset++) {
@@ -100,9 +115,21 @@ private static int strlen_short(MemorySegment segment, long start) {
100115
throw new IllegalArgumentException("String too large");
101116
}
102117

118+
private static int strlen_int(MemorySegment segment, long start) {
119+
// iterate until overflow (String can only hold a byte[], whose length can be expressed as an int)
120+
for (int offset = 0; offset >= 0; offset += 4) {
121+
int curr = segment.get(JAVA_INT, start + offset);
122+
if (curr == 0) {
123+
return offset;
124+
}
125+
}
126+
throw new IllegalArgumentException("String too large");
127+
}
128+
103129
public enum CharsetKind {
104130
SINGLE_BYTE(1),
105-
DOUBLE_BYTE(2);
131+
DOUBLE_BYTE(2),
132+
QUAD_BYTE(4);
106133

107134
final int terminatorCharSize;
108135

@@ -119,6 +146,8 @@ public static CharsetKind of(Charset charset) {
119146
return CharsetKind.SINGLE_BYTE;
120147
} else if (charset == StandardCharsets.UTF_16LE || charset == StandardCharsets.UTF_16BE || charset == StandardCharsets.UTF_16) {
121148
return CharsetKind.DOUBLE_BYTE;
149+
} else if (charset == StandardCharsets.UTF_32LE || charset == StandardCharsets.UTF_32BE || charset == StandardCharsets.UTF_32) {
150+
return CharsetKind.QUAD_BYTE;
122151
} else {
123152
throw new UnsupportedOperationException("Unsupported charset: " + charset);
124153
}

‎test/jdk/java/foreign/TestStringEncoding.java

+3
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ public void testStrings(String testString) throws ReflectiveOperationException {
5151
if (charset == StandardCharsets.UTF_16) {
5252
terminatorSize -= 2; // drop BOM
5353
}
54+
// Note that the JDK's UTF_32 encoder doesn't add a BOM.
55+
// This is legal under the Unicode standard, and means the byte order is BE.
56+
// See: https://unicode.org/faq/utf_bom.html#gen7
5457

5558
int expectedByteLength =
5659
testString.getBytes(charset).length +

0 commit comments

Comments
 (0)
Please sign in to comment.