Skip to content

Commit 033cced

Browse files
committedNov 29, 2023
8320368: Per-CPU optimization of Klass range reservation
Reviewed-by: rkennke, rehn
1 parent 48960df commit 033cced

15 files changed

+588
-106
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
/*
2+
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
3+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
4+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5+
*
6+
* This code is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU General Public License version 2 only, as
8+
* published by the Free Software Foundation.
9+
*
10+
* This code is distributed in the hope that it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
* version 2 for more details (a copy is included in the LICENSE file that
14+
* accompanied this code).
15+
*
16+
* You should have received a copy of the GNU General Public License version
17+
* 2 along with this work; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19+
*
20+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21+
* or visit www.oracle.com if you need additional information or have any
22+
* questions.
23+
*
24+
*/
25+
26+
#include "precompiled.hpp"
27+
#include "asm/assembler.hpp"
28+
#include "logging/log.hpp"
29+
#include "oops/compressedKlass.hpp"
30+
#include "memory/metaspace.hpp"
31+
#include "runtime/os.hpp"
32+
#include "utilities/globalDefinitions.hpp"
33+
34+
// Helper function; reserve at an address that is compatible with EOR
35+
static char* reserve_at_eor_compatible_address(size_t size, bool aslr) {
36+
char* result = nullptr;
37+
38+
log_debug(metaspace, map)("Trying to reserve at an EOR-compatible address");
39+
40+
// We need immediates that are 32-bit aligned, since they should not intersect nKlass
41+
// bits. They should not be larger than the addressable space either, but we still
42+
// lack a good abstraction for that (see JDK-8320584), therefore we assume and hard-code
43+
// 2^48 as a reasonable higher ceiling.
44+
static const uint16_t immediates[] = {
45+
0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0007, 0x0008, 0x000c, 0x000e,
46+
0x000f, 0x0010, 0x0018, 0x001c, 0x001e, 0x001f, 0x0020, 0x0030, 0x0038,
47+
0x003c, 0x003e, 0x003f, 0x0040, 0x0060, 0x0070, 0x0078, 0x007c, 0x007e,
48+
0x007f, 0x0080, 0x00c0, 0x00e0, 0x00f0, 0x00f8, 0x00fc, 0x00fe, 0x00ff,
49+
0x0100, 0x0180, 0x01c0, 0x01e0, 0x01f0, 0x01f8, 0x01fc, 0x01fe, 0x01ff,
50+
0x0200, 0x0300, 0x0380, 0x03c0, 0x03e0, 0x03f0, 0x03f8, 0x03fc, 0x03fe,
51+
0x03ff, 0x0400, 0x0600, 0x0700, 0x0780, 0x07c0, 0x07e0, 0x07f0, 0x07f8,
52+
0x07fc, 0x07fe, 0x07ff, 0x0800, 0x0c00, 0x0e00, 0x0f00, 0x0f80, 0x0fc0,
53+
0x0fe0, 0x0ff0, 0x0ff8, 0x0ffc, 0x0ffe, 0x0fff, 0x1000, 0x1800, 0x1c00,
54+
0x1e00, 0x1f00, 0x1f80, 0x1fc0, 0x1fe0, 0x1ff0, 0x1ff8, 0x1ffc, 0x1ffe,
55+
0x1fff, 0x2000, 0x3000, 0x3800, 0x3c00, 0x3e00, 0x3f00, 0x3f80, 0x3fc0,
56+
0x3fe0, 0x3ff0, 0x3ff8, 0x3ffc, 0x3ffe, 0x3fff, 0x4000, 0x6000, 0x7000,
57+
0x7800, 0x7c00, 0x7e00, 0x7f00, 0x7f80, 0x7fc0, 0x7fe0, 0x7ff0, 0x7ff8,
58+
0x7ffc, 0x7ffe, 0x7fff
59+
};
60+
static constexpr int num_immediates = sizeof(immediates) / sizeof(immediates[0]);
61+
const int start_index = aslr ? os::random() : 0;
62+
constexpr int max_tries = 64;
63+
for (int ntry = 0; result == nullptr && ntry < max_tries; ntry ++) {
64+
// As in os::attempt_reserve_memory_between, we alternate between higher and lower
65+
// addresses; this maximizes the chance of early success if part of the address space
66+
// is not accessible (e.g. 39-bit address space).
67+
const int alt_index = (ntry & 1) ? 0 : num_immediates / 2;
68+
const int index = (start_index + ntry + alt_index) % num_immediates;
69+
const uint64_t immediate = ((uint64_t)immediates[index]) << 32;
70+
assert(immediate > 0 && Assembler::operand_valid_for_logical_immediate(/*is32*/false, immediate),
71+
"Invalid immediate %d " UINT64_FORMAT, index, immediate);
72+
result = os::attempt_reserve_memory_at((char*)immediate, size, false);
73+
if (result == nullptr) {
74+
log_trace(metaspace, map)("Failed to attach at " UINT64_FORMAT_X, immediate);
75+
}
76+
}
77+
if (result == nullptr) {
78+
log_debug(metaspace, map)("Failed to reserve at any EOR-compatible address");
79+
}
80+
return result;
81+
}
82+
char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base) {
83+
84+
char* result = nullptr;
85+
86+
// Optimize for base=0 shift=0
87+
if (optimize_for_zero_base) {
88+
result = reserve_address_space_for_unscaled_encoding(size, aslr);
89+
}
90+
91+
// If this fails, we don't bother aiming for zero-based encoding (base=0 shift>0), since it has no
92+
// advantages over EOR or movk mode.
93+
94+
// EOR-compatible reservation
95+
if (result == nullptr) {
96+
result = reserve_at_eor_compatible_address(size, aslr);
97+
}
98+
99+
// Movk-compatible reservation via probing.
100+
if (result == nullptr) {
101+
result = reserve_address_space_for_16bit_move(size, aslr);
102+
}
103+
104+
// Movk-compatible reservation via overallocation.
105+
// If that failed, attempt to allocate at any 4G-aligned address. Let the system decide where. For ASLR,
106+
// we now rely on the system.
107+
// Compared with the probing done above, this has two disadvantages:
108+
// - on a kernel with 52-bit address space we may get an address that has bits set between [48, 52).
109+
// In that case, we may need two movk moves (not yet implemented).
110+
// - this technique leads to temporary over-reservation of address space; it will spike the vsize of
111+
// the process. Therefore it may fail if a vsize limit is in place (e.g. ulimit -v).
112+
if (result == nullptr) {
113+
constexpr size_t alignment = nth_bit(32);
114+
log_debug(metaspace, map)("Trying to reserve at a 32-bit-aligned address");
115+
result = os::reserve_memory_aligned(size, alignment, false);
116+
}
117+
118+
return result;
119+
}
120+
121+
void CompressedKlassPointers::initialize(address addr, size_t len) {
122+
constexpr uintptr_t unscaled_max = nth_bit(32);
123+
assert(len <= unscaled_max, "Klass range larger than 32 bits?");
124+
125+
// Shift is always 0 on aarch64.
126+
_shift = 0;
127+
128+
// On aarch64, we don't bother with zero-based encoding (base=0 shift>0).
129+
address const end = addr + len;
130+
_base = (end <= (address)unscaled_max) ? nullptr : addr;
131+
132+
_range = end - _base;
133+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
3+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
4+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5+
*
6+
* This code is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU General Public License version 2 only, as
8+
* published by the Free Software Foundation.
9+
*
10+
* This code is distributed in the hope that it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
* version 2 for more details (a copy is included in the LICENSE file that
14+
* accompanied this code).
15+
*
16+
* You should have received a copy of the GNU General Public License version
17+
* 2 along with this work; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19+
*
20+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21+
* or visit www.oracle.com if you need additional information or have any
22+
* questions.
23+
*
24+
*/
25+
26+
#include "precompiled.hpp"
27+
#include "oops/compressedKlass.hpp"
28+
#include "utilities/globalDefinitions.hpp"
29+
30+
char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base) {
31+
32+
char* result = nullptr;
33+
34+
// Optimize for base=0 shift=0; failing that, for base=0 shift>0
35+
if (optimize_for_zero_base) {
36+
result = reserve_address_space_for_unscaled_encoding(size, aslr);
37+
if (result == nullptr) {
38+
result = reserve_address_space_for_zerobased_encoding(size, aslr);
39+
}
40+
}
41+
42+
// Optimize for a single 16-bit move: a base that has only bits set in its third quadrant [32..48).
43+
if (result == nullptr) {
44+
result = reserve_address_space_for_16bit_move(size, aslr);
45+
}
46+
47+
return result;
48+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
3+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
4+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5+
*
6+
* This code is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU General Public License version 2 only, as
8+
* published by the Free Software Foundation.
9+
*
10+
* This code is distributed in the hope that it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
* version 2 for more details (a copy is included in the LICENSE file that
14+
* accompanied this code).
15+
*
16+
* You should have received a copy of the GNU General Public License version
17+
* 2 along with this work; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19+
*
20+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21+
* or visit www.oracle.com if you need additional information or have any
22+
* questions.
23+
*
24+
*/
25+
26+
#include "precompiled.hpp"
27+
#include "oops/compressedKlass.hpp"
28+
#include "utilities/globalDefinitions.hpp"
29+
30+
char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base) {
31+
32+
char* result = nullptr;
33+
34+
// RiscV loads a 64-bit immediate in up to four separate steps, splitting it into four different sections
35+
// (two 32-bit sections, each split into two subsections of 20/12 bits).
36+
//
37+
// 63 ....... 44 43 ... 32 31 ....... 12 11 ... 0
38+
// D C B A
39+
//
40+
// A "good" base is, in this order:
41+
// 1) only bits in A; this would be an address < 4KB, which is unrealistic on normal Linux boxes since
42+
// the typical default for vm.mmap_min_address is 64KB. We ignore that.
43+
// 2) only bits in B: a 12-bit-aligned address below 4GB. 12 bit = 4KB, but since mmap reserves at
44+
// page boundaries, we can ignore the alignment.
45+
// 3) only bits in C: a 4GB-aligned address that is lower than 16TB.
46+
// 4) only bits in D: a 16TB-aligned address.
47+
48+
// First, attempt to allocate < 4GB. We do this unconditionally:
49+
// - if can_optimize_for_zero_base, a <4GB mapping start would allow us to run unscaled (base = 0, shift = 0)
50+
// - if !can_optimize_for_zero_base, a <4GB mapping start is still good, the resulting immediate can be encoded
51+
// with one instruction (2)
52+
result = reserve_address_space_for_unscaled_encoding(size, aslr);
53+
54+
// Failing that, attempt to reserve for base=zero shift>0
55+
if (result == nullptr && optimize_for_zero_base) {
56+
result = reserve_address_space_for_zerobased_encoding(size, aslr);
57+
}
58+
59+
// Failing that, optimize for case (3) - a base with only bits set between [33-44)
60+
if (result == nullptr) {
61+
const uintptr_t from = nth_bit(32 + (optimize_for_zero_base ? LogKlassAlignmentInBytes : 0));
62+
constexpr uintptr_t to = nth_bit(44);
63+
constexpr size_t alignment = nth_bit(32);
64+
result = reserve_address_space_X(from, to, size, alignment, aslr);
65+
}
66+
67+
// Failing that, optimize for case (4) - a base with only bits set between [44-64)
68+
if (result == nullptr) {
69+
constexpr uintptr_t from = nth_bit(44);
70+
constexpr uintptr_t to = UINT64_MAX;
71+
constexpr size_t alignment = nth_bit(44);
72+
result = reserve_address_space_X(from, to, size, alignment, aslr);
73+
}
74+
75+
return result;
76+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
3+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
4+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5+
*
6+
* This code is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU General Public License version 2 only, as
8+
* published by the Free Software Foundation.
9+
*
10+
* This code is distributed in the hope that it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
* version 2 for more details (a copy is included in the LICENSE file that
14+
* accompanied this code).
15+
*
16+
* You should have received a copy of the GNU General Public License version
17+
* 2 along with this work; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19+
*
20+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21+
* or visit www.oracle.com if you need additional information or have any
22+
* questions.
23+
*
24+
*/
25+
26+
#include "precompiled.hpp"
27+
#include "oops/compressedKlass.hpp"
28+
#include "utilities/globalDefinitions.hpp"
29+
30+
char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base) {
31+
32+
char* result = nullptr;
33+
34+
uintptr_t tried_below = 0;
35+
36+
// First, attempt to allocate < 4GB. We do this unconditionally:
37+
// - if optimize_for_zero_base, a <4GB mapping start allows us to use base=0 shift=0
38+
// - if !optimize_for_zero_base, a <4GB mapping start allows us to use algfi
39+
result = reserve_address_space_for_unscaled_encoding(size, aslr);
40+
41+
// Failing that, try optimized for base=0 shift>0
42+
if (result == nullptr && optimize_for_zero_base) {
43+
result = reserve_address_space_for_zerobased_encoding(size, aslr);
44+
}
45+
46+
// Failing that, aim for a base that is 4G-aligned; such a base can be set with aih.
47+
if (result == nullptr) {
48+
result = reserve_address_space_for_16bit_move(size, aslr);
49+
}
50+
51+
return result;
52+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
3+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
4+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5+
*
6+
* This code is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU General Public License version 2 only, as
8+
* published by the Free Software Foundation.
9+
*
10+
* This code is distributed in the hope that it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
* version 2 for more details (a copy is included in the LICENSE file that
14+
* accompanied this code).
15+
*
16+
* You should have received a copy of the GNU General Public License version
17+
* 2 along with this work; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19+
*
20+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21+
* or visit www.oracle.com if you need additional information or have any
22+
* questions.
23+
*
24+
*/
25+
26+
#include "precompiled.hpp"
27+
28+
#ifdef _LP64
29+
30+
#include "oops/compressedKlass.hpp"
31+
#include "utilities/globalDefinitions.hpp"
32+
33+
char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base) {
34+
35+
char* result = nullptr;
36+
37+
// Optimize for unscaled encoding; failing that, for zero-based encoding:
38+
if (optimize_for_zero_base) {
39+
result = reserve_address_space_for_unscaled_encoding(size, aslr);
40+
if (result == nullptr) {
41+
result = reserve_address_space_for_zerobased_encoding(size, aslr);
42+
}
43+
} // end: low-address reservation
44+
45+
// Nothing more to optimize for on x64. If base != 0, we will always emit the full 64-bit immediate.
46+
return result;
47+
}
48+
49+
#endif // _LP64

‎src/hotspot/os/posix/os_posix.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -343,9 +343,10 @@ char* os::replace_existing_mapping_with_file_mapping(char* base, size_t size, in
343343
}
344344

345345
static size_t calculate_aligned_extra_size(size_t size, size_t alignment) {
346-
assert((alignment & (os::vm_allocation_granularity() - 1)) == 0,
346+
assert(is_aligned(alignment, os::vm_allocation_granularity()),
347347
"Alignment must be a multiple of allocation granularity (page size)");
348-
assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned");
348+
assert(is_aligned(size, os::vm_allocation_granularity()),
349+
"Size must be a multiple of allocation granularity (page size)");
349350

350351
size_t extra_size = size + alignment;
351352
assert(extra_size >= size, "overflow, size is too large to allow alignment");

‎src/hotspot/os/windows/os_windows.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -3331,9 +3331,10 @@ char* os::replace_existing_mapping_with_file_mapping(char* base, size_t size, in
33313331
// virtual space to get requested alignment, like posix-like os's.
33323332
// Windows prevents multiple thread from remapping over each other so this loop is thread-safe.
33333333
static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int file_desc) {
3334-
assert((alignment & (os::vm_allocation_granularity() - 1)) == 0,
3335-
"Alignment must be a multiple of allocation granularity (page size)");
3336-
assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned");
3334+
assert(is_aligned(alignment, os::vm_allocation_granularity()),
3335+
"Alignment must be a multiple of allocation granularity (page size)");
3336+
assert(is_aligned(size, os::vm_allocation_granularity()),
3337+
"Size must be a multiple of allocation granularity (page size)");
33373338

33383339
size_t extra_size = size + alignment;
33393340
assert(extra_size >= size, "overflow, size is too large to allow alignment");

0 commit comments

Comments
 (0)
Please sign in to comment.