diff --git a/make/data/charsetmapping/charsets b/make/data/charsetmapping/charsets index 5932645bfbdc0..c88535cc6fc00 100644 --- a/make/data/charsetmapping/charsets +++ b/make/data/charsetmapping/charsets @@ -1,5 +1,5 @@ # -# Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -1006,6 +1006,22 @@ charset x-IBM935 IBM935 alias ibm-935 alias 935 +charset x-IBM836 IBM836 + package sun.nio.cs.ext + type template + alias cp836 + alias ibm836 + alias 836 + alias ibm-836 + +charset x-IBM837 IBM837 # EBCDIC DBCS-only Simplified Chinese + package sun.nio.cs.ext + type template + alias cp837 + alias ibm837 + alias 837 + alias ibm-837 + charset x-IBM937 IBM937 package sun.nio.cs.ext type ebcdic @@ -1017,6 +1033,14 @@ charset x-IBM937 IBM937 alias ibm-937 alias 937 +charset x-IBM835 IBM835 # EBCDIC DBCS-only Traditional Chinese + package sun.nio.cs.ext + type template + alias cp835 + alias ibm835 + alias 835 + alias ibm-835 + charset x-IBM856 IBM856 package sun.nio.cs.ext type sbcs @@ -1502,6 +1526,14 @@ charset x-IBM939 IBM939 alias ibm-939 alias 939 +charset x-IBM1027 IBM1027 + package sun.nio.cs.ext + type template + alias cp1027 + alias ibm1027 + alias 1027 + alias ibm-1027 + charset x-IBM933 IBM933 package sun.nio.cs.ext type ebcdic diff --git a/src/java.base/share/classes/sun/nio/cs/DoubleByte.java b/src/java.base/share/classes/sun/nio/cs/DoubleByte.java index 0fb8a82f74ff3..253bd55e96b97 100644 --- a/src/java.base/share/classes/sun/nio/cs/DoubleByte.java +++ b/src/java.base/share/classes/sun/nio/cs/DoubleByte.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -287,6 +287,18 @@ public char decodeDouble(int b1, int b2) { return UNMAPPABLE_DECODING; return b2c[b1][b2 - b2Min]; } + + public static char[] toSB(char[] b2cSB_DB, boolean isEBCDIC) { + char[] b2cSB = new char[0x100]; + System.arraycopy(b2cSB_DB, 0, b2cSB, 128, 128); + System.arraycopy(b2cSB_DB, 128, b2cSB, 0, 128); + if (isEBCDIC) { + b2cSB[0x8e] = 0xe; + b2cSB[0x8f] = 0xf; + } + return b2cSB; + } + } // IBM_EBCDIC_DBCS @@ -1117,6 +1129,27 @@ public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { } } + public static class Encoder_SB extends Encoder { + private boolean isEBCDIC = false; + + public Encoder_SB(Charset cs, byte[] repl, + char[] c2b, char[] c2bIndex, + boolean isEBCDIC) { + super(cs, 1.0f, 1.0f, repl, c2b, c2bIndex, false); + this.isEBCDIC = isEBCDIC; + } + + public int encodeChar(char ch) { + int bb = super.encodeChar(ch); + if (bb == UNMAPPABLE_ENCODING + && isEBCDIC + && (ch == 0x0e || ch == 0x0f)) { + return (int) ch; + } + return bb < 0x100 ? bb : UNMAPPABLE_ENCODING; + } + } + // EUC_SIMPLE public static class Encoder_EUC_SIM extends Encoder { public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex, diff --git a/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM1027.java.template b/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM1027.java.template new file mode 100644 index 0000000000000..d9bb2a056e9f2 --- /dev/null +++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM1027.java.template @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + */ + +package $PACKAGE$; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import sun.nio.cs.DoubleByte; +import sun.nio.cs.SingleByte; + +public class IBM1027 extends Charset +{ + public IBM1027() { + super("x-IBM1027", $ALIASES$); + } + + public boolean contains(Charset cs) { + return (cs instanceof IBM1027); + } + + private static char[] b2cSB = + DoubleByte.Decoder.toSB(IBM939.DecodeHolder.b2cSB, true); + + public CharsetDecoder newDecoder() { + return new SingleByte.Decoder(this, b2cSB); + } + + public CharsetEncoder newEncoder() { + return new DoubleByte.Encoder_SB ( + this, new byte[] { 0x6F }, IBM939.EncodeHolder.c2b, + IBM939.EncodeHolder.c2bIndex, true); + } + +} diff --git a/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM835.java.template b/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM835.java.template new file mode 100644 index 0000000000000..c39f3605a5ce0 --- /dev/null +++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM835.java.template @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + */ + +package $PACKAGE$; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import sun.nio.cs.DoubleByte; +import static sun.nio.cs.CharsetMapping.*; + +// EBCDIC DBCS-only Traditional Chinese +public class IBM835 extends Charset +{ + public IBM835() { + super("x-IBM835", $ALIASES$); + } + + public boolean contains(Charset cs) { + return (cs instanceof IBM835); + } + + public CharsetDecoder newDecoder() { + return new DoubleByte.Decoder_DBCSONLY( + this, IBM937.DecodeHolder.b2c, null, 0x40, 0xfe); // hardcode the b2min/max + } + + public CharsetEncoder newEncoder() { + return new Encoder(this); + } + + protected static class Encoder extends DoubleByte.Encoder_DBCSONLY { + public Encoder(Charset cs) { + super(cs, new byte[] {(byte)0xfe, (byte)0xfe}, + IBM937.EncodeHolder.c2b, IBM937.EncodeHolder.c2bIndex, false); + } + + public int encodeChar(char ch) { + int bb = super.encodeChar(ch); + if (bb == UNMAPPABLE_ENCODING) { + // Cp835 has 4 additional non-roundtrip char->bytes + if (ch == '\u2223') { + return 0x424f; + } else if (ch == '\u00af') { + return 0x42a1; + } else if (ch == '\uff5e') { + return 0x43a1; + } else if (ch == '\u2013') { + return 0x444a; + } else if (ch == '\u5f5d') { + return 0x6560; + } + } + return bb; + } + + public boolean isLegalReplacement(byte[] repl) { + if (repl.length == 2 && + repl[0] == (byte)0xfe && repl[1] == (byte)0xfe) + return true; + return super.isLegalReplacement(repl); + } + + } +} diff --git a/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM836.java.template b/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM836.java.template new file mode 100644 index 0000000000000..bbff3cda84306 --- /dev/null +++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM836.java.template @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + */ + +package $PACKAGE$; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import sun.nio.cs.DoubleByte; +import sun.nio.cs.SingleByte; + +public class IBM836 extends Charset +{ + public IBM836() { + super("x-IBM836", $ALIASES$); + } + + public boolean contains(Charset cs) { + return (cs instanceof IBM836); + } + + private static char[] b2cSB = + DoubleByte.Decoder.toSB(IBM935.DecodeHolder.b2cSB, true); + + public CharsetDecoder newDecoder() { + return new SingleByte.Decoder(this, b2cSB); + } + + public CharsetEncoder newEncoder() { + return new DoubleByte.Encoder_SB ( + this, new byte[] { 0x6F }, IBM935.EncodeHolder.c2b, + IBM935.EncodeHolder.c2bIndex, true); + } + +} diff --git a/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM837.java.template b/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM837.java.template new file mode 100644 index 0000000000000..08b86f1dfe2fb --- /dev/null +++ b/src/jdk.charsets/share/classes/sun/nio/cs/ext/IBM837.java.template @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + */ + +package $PACKAGE$; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import sun.nio.cs.DoubleByte; +import static sun.nio.cs.CharsetMapping.*; + +// EBCDIC DBCS-only Simplified Chinese +public class IBM837 extends Charset +{ + public IBM837() { + super("x-IBM837", $ALIASES$); + } + + public boolean contains(Charset cs) { + return (cs instanceof IBM837); + } + + public CharsetDecoder newDecoder() { + return new DoubleByte.Decoder_DBCSONLY( + this, IBM935.DecodeHolder.b2c, null, 0x40, 0xfe); // hardcode the b2min/max + } + + public CharsetEncoder newEncoder() { + return new Encoder(this); + } + + protected static class Encoder extends DoubleByte.Encoder_DBCSONLY { + public Encoder(Charset cs) { + super(cs, new byte[] {(byte)0xfe, (byte)0xfe}, + IBM935.EncodeHolder.c2b, IBM935.EncodeHolder.c2bIndex, false); + } + + public int encodeChar(char ch) { + int bb = super.encodeChar(ch); + if (bb == UNMAPPABLE_ENCODING) { + // Cp837 has 4 additional non-roundtrip char->bytes + if (ch == '\u00b7') { + return 0x4345; + } else if (ch == '\u2014') { + return 0x444a; + } + } + return bb; + } + + public boolean isLegalReplacement(byte[] repl) { + if (repl.length == 2 && + repl[0] == (byte)0xfe && repl[1] == (byte)0xfe) + return true; + return super.isLegalReplacement(repl); + } + + } +}