Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8284842: Update Unicode Data Files to Version 15.0.0 #10839

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
352 changes: 242 additions & 110 deletions src/java.base/share/classes/java/lang/Character.java

Large diffs are not rendered by default.

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -25,7 +25,7 @@

/*
*******************************************************************************
* Copyright (C) 2009-2014, International Business Machines
* Copyright (C) 2009-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*/
Expand Down Expand Up @@ -556,8 +556,8 @@ public String getDecomposition(int c) {
public static final int MIN_YES_YES_WITH_CC=0xfe02;
public static final int JAMO_VT=0xfe00;
public static final int MIN_NORMAL_MAYBE_YES=0xfc00;
public static final int JAMO_L=2; // offset=1 hasCompBoundaryAfter=FALSE
public static final int INERT=1; // offset=0 hasCompBoundaryAfter=TRUE
public static final int JAMO_L=2; // offset=1 hasCompBoundaryAfter=false
public static final int INERT=1; // offset=0 hasCompBoundaryAfter=true

// norm16 bit 0 is comp-boundary-after.
public static final int HAS_COMP_BOUNDARY_AFTER=1;
Expand Down
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -24,7 +24,7 @@
*/
/*
*******************************************************************************
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* Copyright (C) 1996-2015, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
Expand Down Expand Up @@ -246,8 +246,8 @@ public static boolean isUnprintable(int c) {
* Escape unprintable characters using <backslash>uxxxx notation
* for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
* above. If the character is printable ASCII, then do nothing
* and return FALSE. Otherwise, append the escaped notation and
* return TRUE.
* and return false. Otherwise, append the escaped notation and
* return true.
*/
public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
try {
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Expand Up @@ -117,7 +117,7 @@ static void setTrailingWSStart(BidiBase bidiBase)
are already set to paragraph level.
Setting trailingWSStart to pBidi->length will avoid changing the
level of B chars from 0 to paraLevel in getLevels when
orderParagraphsLTR==TRUE
orderParagraphsLTR==true
*/
if (dirProps[start - 1] == BidiBase.B) {
bidiBase.trailingWSStart = start; /* currently == bidiBase.length */
Expand Down
Expand Up @@ -54,7 +54,7 @@ public final class VersionInfo
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static final String ICU_DATA_VERSION_PATH = "70b";
public static final String ICU_DATA_VERSION_PATH = "72b";

// public methods ------------------------------------------------------

Expand Down
Expand Up @@ -35,8 +35,8 @@ public final class Grapheme {
* <p>
* See Unicode Standard Annex #29 Unicode Text Segmentation for the specification
* for the extended grapheme cluster boundary rules. The following implementation
* is based on the annex for Unicode version 14.0.
* (http://www.unicode.org/reports/tr29/tr29-38.html)
* is based on the annex for Unicode version 15.0.
* (http://www.unicode.org/reports/tr29/tr29-40.html)
*
* @param src the {@code CharSequence} to be scanned
* @param off offset to start looking for the next boundary in the src
Expand Down Expand Up @@ -277,6 +277,7 @@ static int getType(int cp) {
case 0x11A88:
case 0x11A89:
case 0x11D46:
case 0x11F02:
return PREPEND;
}
}
Expand Down
21 changes: 14 additions & 7 deletions src/java.base/share/data/unicodedata/Blocks.txt
@@ -1,10 +1,10 @@
# Blocks-14.0.0.txt
# Date: 2021-01-22, 23:29:00 GMT [KW]
# Copyright (c) 2021 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# Blocks-15.0.0.txt
# Date: 2022-01-28, 20:58:00 GMT [KW]
# Copyright (c) 2022 Unicode, Inc.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
# For documentation, see https://www.unicode.org/reports/tr44/
#
# Format:
# Start Code..End Code; Block Name
Expand All @@ -15,7 +15,7 @@
# and underbars are ignored.
# For example, "Latin Extended-A" and "latin extended a" are equivalent.
# For more information on the comparison of property values,
# see UAX #44: http://www.unicode.org/reports/tr44/
# see UAX #44: https://www.unicode.org/reports/tr44/
#
# All block ranges start with a value where (cp MOD 16) = 0,
# and end with a value where (cp MOD 16) = 15. In other words,
Expand Down Expand Up @@ -241,6 +241,7 @@ FFF0..FFFF; Specials
10D00..10D3F; Hanifi Rohingya
10E60..10E7F; Rumi Numeral Symbols
10E80..10EBF; Yezidi
10EC0..10EFF; Arabic Extended-C
10F00..10F2F; Old Sogdian
10F30..10F6F; Sogdian
10F70..10FAF; Old Uyghur
Expand Down Expand Up @@ -272,19 +273,21 @@ FFF0..FFFF; Specials
11A50..11AAF; Soyombo
11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
11AC0..11AFF; Pau Cin Hau
11B00..11B5F; Devanagari Extended-A
11C00..11C6F; Bhaiksuki
11C70..11CBF; Marchen
11D00..11D5F; Masaram Gondi
11D60..11DAF; Gunjala Gondi
11EE0..11EFF; Makasar
11F00..11F5F; Kawi
11FB0..11FBF; Lisu Supplement
11FC0..11FFF; Tamil Supplement
12000..123FF; Cuneiform
12400..1247F; Cuneiform Numbers and Punctuation
12480..1254F; Early Dynastic Cuneiform
12F90..12FFF; Cypro-Minoan
13000..1342F; Egyptian Hieroglyphs
13430..1343F; Egyptian Hieroglyph Format Controls
13430..1345F; Egyptian Hieroglyph Format Controls
14400..1467F; Anatolian Hieroglyphs
16800..16A3F; Bamum Supplement
16A40..16A6F; Mro
Expand All @@ -309,16 +312,19 @@ FFF0..FFFF; Specials
1D000..1D0FF; Byzantine Musical Symbols
1D100..1D1FF; Musical Symbols
1D200..1D24F; Ancient Greek Musical Notation
1D2C0..1D2DF; Kaktovik Numerals
1D2E0..1D2FF; Mayan Numerals
1D300..1D35F; Tai Xuan Jing Symbols
1D360..1D37F; Counting Rod Numerals
1D400..1D7FF; Mathematical Alphanumeric Symbols
1D800..1DAAF; Sutton SignWriting
1DF00..1DFFF; Latin Extended-G
1E000..1E02F; Glagolitic Supplement
1E030..1E08F; Cyrillic Extended-D
1E100..1E14F; Nyiakeng Puachue Hmong
1E290..1E2BF; Toto
1E2C0..1E2FF; Wancho
1E4D0..1E4FF; Nag Mundari
1E7E0..1E7FF; Ethiopic Extended-B
1E800..1E8DF; Mende Kikakui
1E900..1E95F; Adlam
Expand Down Expand Up @@ -348,6 +354,7 @@ FFF0..FFFF; Specials
2CEB0..2EBEF; CJK Unified Ideographs Extension F
2F800..2FA1F; CJK Compatibility Ideographs Supplement
30000..3134F; CJK Unified Ideographs Extension G
31350..323AF; CJK Unified Ideographs Extension H
E0000..E007F; Tags
E0100..E01EF; Variation Selectors Supplement
F0000..FFFFF; Supplementary Private Use Area-A
Expand Down