Skip to content

Commit 9a9cfbe

Browse files
committedFeb 26, 2024
8325340: Add ASCII fast-path to Data-/ObjectInputStream.readUTF
Reviewed-by: rgiulietti, bpb, rriggs
1 parent 3780ad3 commit 9a9cfbe

File tree

4 files changed

+362
-24
lines changed

4 files changed

+362
-24
lines changed
 

‎src/java.base/share/classes/java/io/DataInputStream.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -571,10 +571,10 @@ public final String readUTF() throws IOException {
571571
* valid modified UTF-8 encoding of a Unicode string.
572572
* @see java.io.DataInputStream#readUnsignedShort()
573573
*/
574-
public static final String readUTF(DataInput in) throws IOException {
574+
public static String readUTF(DataInput in) throws IOException {
575575
int utflen = in.readUnsignedShort();
576-
byte[] bytearr = null;
577-
char[] chararr = null;
576+
byte[] bytearr;
577+
char[] chararr;
578578
if (in instanceof DataInputStream dis) {
579579
if (dis.bytearr.length < utflen) {
580580
dis.bytearr = new byte[utflen*2];

‎src/java.base/share/classes/java/io/ObjectInputStream.java

+39-14
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import java.lang.reflect.InvocationHandler;
3434
import java.lang.reflect.Modifier;
3535
import java.lang.reflect.Proxy;
36+
import java.nio.charset.StandardCharsets;
3637
import java.security.AccessControlContext;
3738
import java.security.AccessController;
3839
import java.security.PrivilegedAction;
@@ -42,6 +43,7 @@
4243
import java.util.Map;
4344
import java.util.Objects;
4445

46+
import jdk.internal.access.JavaLangAccess;
4547
import jdk.internal.access.SharedSecrets;
4648
import jdk.internal.event.DeserializationEvent;
4749
import jdk.internal.misc.Unsafe;
@@ -2995,6 +2997,8 @@ private class BlockDataInputStream
29952997
private static final int CHAR_BUF_SIZE = 256;
29962998
/** readBlockHeader() return value indicating header read may block */
29972999
private static final int HEADER_BLOCKED = -2;
3000+
/** access to internal methods to count ASCII and inflate latin1/ASCII bytes to char */
3001+
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
29983002

29993003
/** buffer for reading general/block data */
30003004
private final byte[] buf = new byte[MAX_BLOCK_SIZE];
@@ -3671,23 +3675,47 @@ String readLongUTF() throws IOException {
36713675
* utflen bytes.
36723676
*/
36733677
private String readUTFBody(long utflen) throws IOException {
3678+
if (!blkmode) {
3679+
end = pos = 0;
3680+
}
3681+
36743682
StringBuilder sbuf;
36753683
if (utflen > 0 && utflen < Integer.MAX_VALUE) {
3684+
// Scan for leading ASCII chars
3685+
int avail = end - pos;
3686+
int ascii = JLA.countPositives(buf, pos, Math.min(avail, (int)utflen));
3687+
if (ascii == utflen) {
3688+
// Complete match, consume the buf[pos ... pos + ascii] range and return.
3689+
// Modified UTF-8 and ISO-8859-1 are both ASCII-compatible encodings bytes
3690+
// thus we can treat the range as ISO-8859-1 and avoid a redundant scan
3691+
// in the String constructor
3692+
String utf = new String(buf, pos, ascii, StandardCharsets.ISO_8859_1);
3693+
pos += ascii;
3694+
return utf;
3695+
}
3696+
// Avoid allocating a StringBuilder if there's enough data in buf and
3697+
// cbuf is large enough
3698+
if (avail >= utflen && utflen <= CHAR_BUF_SIZE) {
3699+
JLA.inflateBytesToChars(buf, pos, cbuf, 0, ascii);
3700+
pos += ascii;
3701+
int cbufPos = readUTFSpan(ascii, utflen - ascii);
3702+
return new String(cbuf, 0, cbufPos);
3703+
}
36763704
// a reasonable initial capacity based on the UTF length
36773705
int initialCapacity = Math.min((int)utflen, 0xFFFF);
36783706
sbuf = new StringBuilder(initialCapacity);
36793707
} else {
36803708
sbuf = new StringBuilder();
36813709
}
36823710

3683-
if (!blkmode) {
3684-
end = pos = 0;
3685-
}
3686-
36873711
while (utflen > 0) {
36883712
int avail = end - pos;
36893713
if (avail >= 3 || (long) avail == utflen) {
3690-
utflen -= readUTFSpan(sbuf, utflen);
3714+
int cbufPos = readUTFSpan(0, utflen);
3715+
// pos has advanced: adjust utflen by the difference in
3716+
// available bytes
3717+
utflen -= avail - (end - pos);
3718+
sbuf.append(cbuf, 0, cbufPos);
36913719
} else {
36923720
if (blkmode) {
36933721
// near block boundary, read one byte at a time
@@ -3709,18 +3737,17 @@ private String readUTFBody(long utflen) throws IOException {
37093737

37103738
/**
37113739
* Reads span of UTF-encoded characters out of internal buffer
3712-
* (starting at offset pos and ending at or before offset end),
3713-
* consuming no more than utflen bytes. Appends read characters to
3714-
* sbuf. Returns the number of bytes consumed.
3740+
* (starting at offset pos), consuming no more than utflen bytes.
3741+
* Appends read characters to cbuf. Returns the current position
3742+
* in cbuf.
37153743
*/
3716-
private long readUTFSpan(StringBuilder sbuf, long utflen)
3744+
private int readUTFSpan(int cpos, long utflen)
37173745
throws IOException
37183746
{
3719-
int cpos = 0;
37203747
int start = pos;
37213748
int avail = Math.min(end - pos, CHAR_BUF_SIZE);
37223749
// stop short of last char unless all of utf bytes in buffer
3723-
int stop = pos + ((utflen > avail) ? avail - 2 : (int) utflen);
3750+
int stop = start + ((utflen > avail) ? avail - 2 : (int) utflen);
37243751
boolean outOfBounds = false;
37253752

37263753
try {
@@ -3765,9 +3792,7 @@ private long readUTFSpan(StringBuilder sbuf, long utflen)
37653792
throw new UTFDataFormatException();
37663793
}
37673794
}
3768-
3769-
sbuf.append(cbuf, 0, cpos);
3770-
return pos - start;
3795+
return cpos;
37713796
}
37723797

37733798
/**

‎test/micro/org/openjdk/bench/java/io/DataInputStreamTest.java

+133-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
2-
* Copyright (c) 2020, 2022, Red Hat Inc. All rights reserved.
2+
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
3+
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
34
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
45
*
56
* This code is free software; you can redistribute it and/or modify it
@@ -27,7 +28,10 @@
2728
import org.openjdk.jmh.infra.Blackhole;
2829

2930
import java.io.ByteArrayInputStream;
31+
import java.io.ByteArrayOutputStream;
3032
import java.io.DataInputStream;
33+
import java.io.DataOutputStream;
34+
import java.io.IOException;
3135
import java.util.concurrent.ThreadLocalRandom;
3236
import java.util.concurrent.TimeUnit;
3337

@@ -38,22 +42,85 @@
3842
@Warmup(iterations = 2, time = 2)
3943
@State(Scope.Thread)
4044
public class DataInputStreamTest {
41-
private final int size = 1024;
45+
private static final int SIZE = 1024;
4246

4347
private ByteArrayInputStream bais;
48+
private ByteArrayInputStream utfDataAsciiMixed;
49+
private ByteArrayInputStream utfDataMixed;
50+
51+
private ByteArrayInputStream utfDataAsciiSmall;
52+
private ByteArrayInputStream utfDataSmall;
53+
54+
private ByteArrayInputStream utfDataAsciiLarge;
55+
private ByteArrayInputStream utfDataLarge;
56+
57+
private static final int REPEATS = 20;
4458

4559
@Setup(Level.Iteration)
46-
public void setup() {
47-
byte[] bytes = new byte[size];
60+
public void setup() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException {
61+
byte[] bytes = new byte[SIZE];
4862
ThreadLocalRandom.current().nextBytes(bytes);
4963
bais = new ByteArrayInputStream(bytes);
64+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
65+
DataOutputStream dataOut = new DataOutputStream(baos);
66+
for (int i = 0; i < REPEATS; i++) {
67+
dataOut.writeUTF("small");
68+
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
69+
}
70+
dataOut.flush();
71+
utfDataAsciiMixed = new ByteArrayInputStream(baos.toByteArray());
72+
73+
baos = new ByteArrayOutputStream();
74+
dataOut = new DataOutputStream(baos);
75+
for (int i = 0; i < REPEATS; i++) {
76+
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
77+
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
78+
}
79+
dataOut.flush();
80+
utfDataAsciiLarge = new ByteArrayInputStream(baos.toByteArray());
81+
82+
baos = new ByteArrayOutputStream();
83+
dataOut = new DataOutputStream(baos);
84+
for (int i = 0; i < REPEATS; i++) {
85+
dataOut.writeUTF("smol");
86+
dataOut.writeUTF("smally");
87+
}
88+
dataOut.flush();
89+
utfDataAsciiSmall = new ByteArrayInputStream(baos.toByteArray());
90+
91+
baos = new ByteArrayOutputStream();
92+
dataOut = new DataOutputStream(baos);
93+
for (int i = 0; i < REPEATS; i++) {
94+
dataOut.writeUTF("sm\u00FFll");
95+
dataOut.writeUTF("slightly longer string th\u01F3t is more likely to trigger use of simd intrinsics");
96+
}
97+
dataOut.flush();
98+
utfDataMixed = new ByteArrayInputStream(baos.toByteArray());
99+
100+
baos = new ByteArrayOutputStream();
101+
dataOut = new DataOutputStream(baos);
102+
for (int i = 0; i < REPEATS; i++) {
103+
dataOut.writeUTF("sm\u00F3l");
104+
dataOut.writeUTF("small\u0132");
105+
}
106+
dataOut.flush();
107+
utfDataSmall = new ByteArrayInputStream(baos.toByteArray());
108+
109+
baos = new ByteArrayOutputStream();
110+
dataOut = new DataOutputStream(baos);
111+
for (int i = 0; i < REPEATS; i++) {
112+
dataOut.writeUTF("slightly longer string that is more likely to trigg\u0131r use of simd intrinsics");
113+
dataOut.writeUTF("slightly longer string th\u0131t is more likely to trigger use of simd intrinsics");
114+
}
115+
dataOut.flush();
116+
utfDataLarge = new ByteArrayInputStream(baos.toByteArray());
50117
}
51118

52119
@Benchmark
53120
public void readChar(Blackhole bh) throws Exception {
54121
bais.reset();
55122
DataInputStream dis = new DataInputStream(bais);
56-
for (int i = 0; i < size / 2; i++) {
123+
for (int i = 0; i < SIZE / 2; i++) {
57124
bh.consume(dis.readChar());
58125
}
59126
}
@@ -62,8 +129,68 @@ public void readChar(Blackhole bh) throws Exception {
62129
public void readInt(Blackhole bh) throws Exception {
63130
bais.reset();
64131
DataInputStream dis = new DataInputStream(bais);
65-
for (int i = 0; i < size / 4; i++) {
132+
for (int i = 0; i < SIZE / 4; i++) {
66133
bh.consume(dis.readInt());
67134
}
68135
}
136+
137+
@Benchmark
138+
public void readUTFAsciiMixed(Blackhole bh) throws Exception {
139+
utfDataAsciiMixed.reset();
140+
DataInputStream dis = new DataInputStream(utfDataAsciiMixed);
141+
for (int i = 0; i < REPEATS; i++) {
142+
bh.consume(dis.readUTF());
143+
bh.consume(dis.readUTF());
144+
}
145+
}
146+
147+
@Benchmark
148+
public void readUTFAsciiSmall(Blackhole bh) throws Exception {
149+
utfDataAsciiSmall.reset();
150+
DataInputStream dis = new DataInputStream(utfDataAsciiSmall);
151+
for (int i = 0; i < REPEATS; i++) {
152+
bh.consume(dis.readUTF());
153+
bh.consume(dis.readUTF());
154+
}
155+
}
156+
157+
@Benchmark
158+
public void readUTFAsciiLarge(Blackhole bh) throws Exception {
159+
utfDataAsciiLarge.reset();
160+
DataInputStream dis = new DataInputStream(utfDataAsciiLarge);
161+
for (int i = 0; i < REPEATS; i++) {
162+
bh.consume(dis.readUTF());
163+
bh.consume(dis.readUTF());
164+
}
165+
}
166+
167+
@Benchmark
168+
public void readUTFMixed(Blackhole bh) throws Exception {
169+
utfDataMixed.reset();
170+
DataInputStream dis = new DataInputStream(utfDataMixed);
171+
for (int i = 0; i < REPEATS; i++) {
172+
bh.consume(dis.readUTF());
173+
bh.consume(dis.readUTF());
174+
}
175+
}
176+
177+
@Benchmark
178+
public void readUTFSmall(Blackhole bh) throws Exception {
179+
utfDataSmall.reset();
180+
DataInputStream dis = new DataInputStream(utfDataSmall);
181+
for (int i = 0; i < REPEATS; i++) {
182+
bh.consume(dis.readUTF());
183+
bh.consume(dis.readUTF());
184+
}
185+
}
186+
187+
@Benchmark
188+
public void readUTFLarge(Blackhole bh) throws Exception {
189+
utfDataLarge.reset();
190+
DataInputStream dis = new DataInputStream(utfDataLarge);
191+
for (int i = 0; i < REPEATS; i++) {
192+
bh.consume(dis.readUTF());
193+
bh.consume(dis.readUTF());
194+
}
195+
}
69196
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
/*
2+
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package org.openjdk.bench.java.io;
25+
26+
import org.openjdk.jmh.annotations.Benchmark;
27+
import org.openjdk.jmh.annotations.BenchmarkMode;
28+
import org.openjdk.jmh.annotations.Fork;
29+
import org.openjdk.jmh.annotations.Level;
30+
import org.openjdk.jmh.annotations.Measurement;
31+
import org.openjdk.jmh.annotations.Mode;
32+
import org.openjdk.jmh.annotations.OutputTimeUnit;
33+
import org.openjdk.jmh.annotations.Scope;
34+
import org.openjdk.jmh.annotations.Setup;
35+
import org.openjdk.jmh.annotations.State;
36+
import org.openjdk.jmh.annotations.Warmup;
37+
import org.openjdk.jmh.infra.Blackhole;
38+
39+
import java.io.ByteArrayInputStream;
40+
import java.io.ByteArrayOutputStream;
41+
import java.io.DataInputStream;
42+
import java.io.DataOutputStream;
43+
import java.io.IOException;
44+
import java.io.ObjectInputStream;
45+
import java.io.ObjectOutputStream;
46+
import java.util.concurrent.ThreadLocalRandom;
47+
import java.util.concurrent.TimeUnit;
48+
49+
@BenchmarkMode(Mode.AverageTime)
50+
@OutputTimeUnit(TimeUnit.MICROSECONDS)
51+
@Fork(value = 3, warmups = 0)
52+
@Measurement(iterations = 5, time = 1)
53+
@Warmup(iterations = 2, time = 2)
54+
@State(Scope.Thread)
55+
public class ObjectInputStreamTest {
56+
private ByteArrayInputStream utfDataAsciiMixed;
57+
private ByteArrayInputStream utfDataMixed;
58+
59+
private ByteArrayInputStream utfDataAsciiSmall;
60+
private ByteArrayInputStream utfDataSmall;
61+
62+
private ByteArrayInputStream utfDataAsciiLarge;
63+
private ByteArrayInputStream utfDataLarge;
64+
65+
// Overhead of creating an ObjectInputStream is significant, need to increase the number of data elements
66+
// to balance work
67+
private static final int REPEATS = 20;
68+
69+
70+
@Setup(Level.Iteration)
71+
public void setup() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException {
72+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
73+
ObjectOutputStream dataOut = new ObjectOutputStream(baos);
74+
for (int i = 0; i < REPEATS; i++) {
75+
dataOut.writeUTF("small");
76+
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
77+
}
78+
dataOut.flush();
79+
utfDataAsciiMixed = new ByteArrayInputStream(baos.toByteArray());
80+
81+
baos = new ByteArrayOutputStream();
82+
dataOut = new ObjectOutputStream(baos);
83+
for (int i = 0; i < REPEATS; i++) {
84+
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
85+
dataOut.writeUTF("slightly longer string that is more likely to trigger use of simd intrinsics");
86+
}
87+
dataOut.flush();
88+
utfDataAsciiLarge = new ByteArrayInputStream(baos.toByteArray());
89+
90+
baos = new ByteArrayOutputStream();
91+
dataOut = new ObjectOutputStream(baos);
92+
for (int i = 0; i < REPEATS; i++) {
93+
dataOut.writeUTF("smol");
94+
dataOut.writeUTF("smally");
95+
}
96+
dataOut.flush();
97+
utfDataAsciiSmall = new ByteArrayInputStream(baos.toByteArray());
98+
99+
baos = new ByteArrayOutputStream();
100+
dataOut = new ObjectOutputStream(baos);
101+
for (int i = 0; i < REPEATS; i++) {
102+
dataOut.writeUTF("sm\u00FFll");
103+
dataOut.writeUTF("slightly longer string th\u01F3t is more likely to trigger use of simd intrinsics");
104+
}
105+
dataOut.flush();
106+
utfDataMixed = new ByteArrayInputStream(baos.toByteArray());
107+
108+
baos = new ByteArrayOutputStream();
109+
dataOut = new ObjectOutputStream(baos);
110+
for (int i = 0; i < REPEATS; i++) {
111+
dataOut.writeUTF("sm\u00F3l");
112+
dataOut.writeUTF("small\u0132");
113+
}
114+
dataOut.flush();
115+
utfDataSmall = new ByteArrayInputStream(baos.toByteArray());
116+
117+
baos = new ByteArrayOutputStream();
118+
dataOut = new ObjectOutputStream(baos);
119+
for (int i = 0; i < REPEATS; i++) {
120+
dataOut.writeUTF("slightly longer string that is more likely to trigg\u0131r use of simd intrinsics");
121+
dataOut.writeUTF("slightly longer string th\u0131t is more likely to trigger use of simd intrinsics");
122+
}
123+
dataOut.flush();
124+
utfDataLarge = new ByteArrayInputStream(baos.toByteArray());
125+
}
126+
127+
@Benchmark
128+
public void readUTFAsciiMixed(Blackhole bh) throws Exception {
129+
utfDataAsciiMixed.reset();
130+
ObjectInputStream ois = new ObjectInputStream(utfDataAsciiMixed);
131+
for (int i = 0; i < REPEATS; i++) {
132+
bh.consume(ois.readUTF());
133+
bh.consume(ois.readUTF());
134+
}
135+
}
136+
137+
@Benchmark
138+
public void readUTFAsciiSmall(Blackhole bh) throws Exception {
139+
utfDataAsciiSmall.reset();
140+
ObjectInputStream ois = new ObjectInputStream(utfDataAsciiSmall);
141+
for (int i = 0; i < REPEATS; i++) {
142+
bh.consume(ois.readUTF());
143+
bh.consume(ois.readUTF());
144+
}
145+
}
146+
147+
@Benchmark
148+
public void readUTFAsciiLarge(Blackhole bh) throws Exception {
149+
utfDataAsciiLarge.reset();
150+
ObjectInputStream ois = new ObjectInputStream(utfDataAsciiLarge);
151+
for (int i = 0; i < REPEATS; i++) {
152+
bh.consume(ois.readUTF());
153+
bh.consume(ois.readUTF());
154+
}
155+
}
156+
157+
@Benchmark
158+
public void readUTFMixed(Blackhole bh) throws Exception {
159+
utfDataMixed.reset();
160+
ObjectInputStream ois = new ObjectInputStream(utfDataMixed);
161+
for (int i = 0; i < REPEATS; i++) {
162+
bh.consume(ois.readUTF());
163+
bh.consume(ois.readUTF());
164+
}
165+
}
166+
167+
@Benchmark
168+
public void readUTFSmall(Blackhole bh) throws Exception {
169+
utfDataSmall.reset();
170+
ObjectInputStream ois = new ObjectInputStream(utfDataSmall);
171+
for (int i = 0; i < REPEATS; i++) {
172+
bh.consume(ois.readUTF());
173+
bh.consume(ois.readUTF());
174+
}
175+
}
176+
177+
@Benchmark
178+
public void readUTFLarge(Blackhole bh) throws Exception {
179+
utfDataLarge.reset();
180+
ObjectInputStream ois = new ObjectInputStream(utfDataLarge);
181+
for (int i = 0; i < REPEATS; i++) {
182+
bh.consume(ois.readUTF());
183+
bh.consume(ois.readUTF());
184+
}
185+
}
186+
}

0 commit comments

Comments
 (0)
Please sign in to comment.