Skip to content

Commit 21c3353

Browse files
committedOct 17, 2024
Allow virtual threads to unmount when blocked on Object.wait()
1 parent 54813cd commit 21c3353

36 files changed

+817
-119
lines changed
 

‎src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp

+11-4
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,16 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
155155
// extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or
156156
// AbstractInterpreter::layout_activation
157157

158+
// The interpreter native wrapper code adds space in the stack equal to size_of_parameters()
159+
// after the fixed part of the frame. For wait0 this is equal to 3 words (this + long parameter).
160+
// We adjust by this size since otherwise the saved last sp will be less than the extended_sp.
161+
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
162+
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;)
163+
158164
assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), "");
159165
assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), "");
160166
assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
161-
assert(hf.unextended_sp() > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
167+
assert(hf.unextended_sp() + extra_space > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
162168
assert(hf.fp() > (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
163169
assert(hf.fp() <= (intptr_t*)hf.at(frame::interpreter_frame_locals_offset), "");
164170
}
@@ -219,7 +225,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
219225
// If caller is interpreted it already made room for the callee arguments
220226
int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
221227
const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
222-
const int locals = hf.interpreter_frame_method()->max_locals();
223228
intptr_t* frame_sp = caller.unextended_sp() - fsize;
224229
intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
225230
if ((intptr_t)fp % frame::frame_alignment != 0) {
@@ -258,7 +263,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
258263
// we need to recreate a "real" frame pointer, pointing into the stack
259264
fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
260265
} else {
261-
fp = FKind::stub
266+
fp = FKind::stub || FKind::native
262267
? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address.
263268
: *(intptr_t**)(hf.sp() - frame::sender_sp_offset); // we need to re-read fp because it may be an oop and we might have fixed the frame.
264269
}
@@ -329,7 +334,9 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
329334
assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");
330335

331336
// Make sure that extended_sp is kept relativized.
332-
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp(), "");
337+
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
338+
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) // see comment in relativize_interpreted_frame_metadata()
339+
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp() + extra_space, "");
333340
}
334341

335342
#endif // CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP

‎src/hotspot/cpu/aarch64/frame_aarch64.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@
7373
sender_sp_offset = 2,
7474

7575
// Interpreter frames
76-
interpreter_frame_oop_temp_offset = 3, // for native calls only
76+
interpreter_frame_result_handler_offset = 3, // for native calls only
77+
interpreter_frame_oop_temp_offset = 2, // for native calls only
7778

7879
interpreter_frame_sender_sp_offset = -1,
7980
// outgoing sp before a call to an invoked method

‎src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp

+24-8
Original file line numberDiff line numberDiff line change
@@ -1740,11 +1740,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
17401740
}
17411741

17421742
// Change state to native (we save the return address in the thread, since it might not
1743-
// be pushed on the stack when we do a stack traversal).
1744-
// We use the same pc/oopMap repeatedly when we call out
1743+
// be pushed on the stack when we do a stack traversal). It is enough that the pc()
1744+
// points into the right code segment. It does not have to be the correct return pc.
1745+
// We use the same pc/oopMap repeatedly when we call out.
17451746

17461747
Label native_return;
1747-
__ set_last_Java_frame(sp, noreg, native_return, rscratch1);
1748+
if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
1749+
// For convenience we use the pc we want to resume to in case of preemption on Object.wait.
1750+
__ set_last_Java_frame(sp, noreg, native_return, rscratch1);
1751+
} else {
1752+
intptr_t the_pc = (intptr_t) __ pc();
1753+
oop_maps->add_gc_map(the_pc - start, map);
1754+
1755+
__ set_last_Java_frame(sp, noreg, __ pc(), rscratch1);
1756+
}
17481757

17491758
Label dtrace_method_entry, dtrace_method_entry_done;
17501759
if (DTraceMethodProbes) {
@@ -1847,11 +1856,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
18471856

18481857
__ rt_call(native_func);
18491858

1850-
__ bind(native_return);
1851-
1852-
intptr_t return_pc = (intptr_t) __ pc();
1853-
oop_maps->add_gc_map(return_pc - start, map);
1854-
18551859
// Verify or restore cpu control state after JNI call
18561860
__ restore_cpu_control_state_after_jni(rscratch1, rscratch2);
18571861

@@ -1910,6 +1914,18 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
19101914
__ stlrw(rscratch1, rscratch2);
19111915
__ bind(after_transition);
19121916

1917+
if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
1918+
// Check preemption for Object.wait()
1919+
__ ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
1920+
__ cbz(rscratch1, native_return);
1921+
__ str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
1922+
__ br(rscratch1);
1923+
__ bind(native_return);
1924+
1925+
intptr_t the_pc = (intptr_t) __ pc();
1926+
oop_maps->add_gc_map(the_pc - start, map);
1927+
}
1928+
19131929
Label reguard;
19141930
Label reguard_done;
19151931
__ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));

‎src/hotspot/cpu/aarch64/stackChunkFrameStream_aarch64.inline.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ inline int StackChunkFrameStream<frame_kind>::interpreter_frame_num_oops() const
116116
f.interpreted_frame_oop_map(&mask);
117117
return mask.num_oops()
118118
+ 1 // for the mirror oop
119+
+ (f.interpreter_frame_method()->is_native() ? 1 : 0) // temp oop slot
119120
+ pointer_delta_as_int((intptr_t*)f.interpreter_frame_monitor_begin(),
120121
(intptr_t*)f.interpreter_frame_monitor_end())/BasicObjectLock::size();
121122
}

‎src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp

+27-4
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
13481348
// result handler is in r0
13491349
// set result handler
13501350
__ mov(result_handler, r0);
1351+
__ str(r0, Address(rfp, frame::interpreter_frame_result_handler_offset * wordSize));
1352+
13511353
// pass mirror handle if static call
13521354
{
13531355
Label L;
@@ -1383,9 +1385,10 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
13831385
// pass JNIEnv
13841386
__ add(c_rarg0, rthread, in_bytes(JavaThread::jni_environment_offset()));
13851387

1386-
// Set the last Java PC in the frame anchor to be the return address from
1387-
// the call to the native method: this will allow the debugger to
1388-
// generate an accurate stack trace.
1388+
// It is enough that the pc() points into the right code
1389+
// segment. It does not have to be the correct return pc.
1390+
// For convenience we use the pc we want to resume to in
1391+
// case of preemption on Object.wait.
13891392
Label native_return;
13901393
__ set_last_Java_frame(esp, rfp, native_return, rscratch1);
13911394

@@ -1406,9 +1409,13 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
14061409
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
14071410
__ stlrw(rscratch1, rscratch2);
14081411

1412+
__ push_cont_fastpath();
1413+
14091414
// Call the native method.
14101415
__ blr(r10);
1411-
__ bind(native_return);
1416+
1417+
__ pop_cont_fastpath();
1418+
14121419
__ get_method(rmethod);
14131420
// result potentially in r0 or v0
14141421

@@ -1466,6 +1473,21 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
14661473
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
14671474
__ stlrw(rscratch1, rscratch2);
14681475

1476+
if (LockingMode != LM_LEGACY) {
1477+
// Check preemption for Object.wait()
1478+
Label not_preempted;
1479+
__ ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
1480+
__ cbz(rscratch1, not_preempted);
1481+
__ str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
1482+
__ br(rscratch1);
1483+
__ bind(native_return);
1484+
__ restore_after_resume(true /* is_native */);
1485+
__ bind(not_preempted);
1486+
} else {
1487+
// any pc will do so just use this one for LM_LEGACY to keep code together.
1488+
__ bind(native_return);
1489+
}
1490+
14691491
// reset_last_Java_frame
14701492
__ reset_last_Java_frame(true);
14711493

@@ -1484,6 +1506,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
14841506
{
14851507
Label no_oop;
14861508
__ adr(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
1509+
__ ldr(result_handler, Address(rfp, frame::interpreter_frame_result_handler_offset*wordSize));
14871510
__ cmp(t, result_handler);
14881511
__ br(Assembler::NE, no_oop);
14891512
// Unbox oop result, e.g. JNIHandles::resolve result.

‎src/hotspot/cpu/riscv/continuationFreezeThaw_riscv.inline.hpp

+12-4
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,16 @@ inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, co
153153
// extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or
154154
// AbstractInterpreter::layout_activation
155155

156+
// The interpreter native wrapper code adds space in the stack equal to size_of_parameters()
157+
// after the fixed part of the frame. For wait0 this is equal to 3 words (this + long parameter).
158+
// We adjust by this size since otherwise the saved last sp will be less than the extended_sp.
159+
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
160+
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;)
161+
156162
assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), "");
157163
assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), "");
158164
assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
159-
assert(hf.unextended_sp() > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
165+
assert(hf.unextended_sp() + extra_space > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
160166
assert(hf.fp() > (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
161167
#ifdef ASSERT
162168
if (f.interpreter_frame_method()->max_locals() > 0) {
@@ -222,7 +228,6 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
222228
// If caller is interpreted it already made room for the callee arguments
223229
int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
224230
const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
225-
const int locals = hf.interpreter_frame_method()->max_locals();
226231
intptr_t* frame_sp = caller.unextended_sp() - fsize;
227232
intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
228233
if ((intptr_t)fp % frame::frame_alignment != 0) {
@@ -261,7 +266,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
261266
// we need to recreate a "real" frame pointer, pointing into the stack
262267
fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
263268
} else {
264-
fp = FKind::stub
269+
fp = FKind::stub || FKind::native
265270
// fp always points to the address above the pushed return pc. We need correct address.
266271
? frame_sp + fsize - frame::sender_sp_offset
267272
// we need to re-read fp because it may be an oop and we might have fixed the frame.
@@ -334,8 +339,11 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
334339
// Make sure that monitor_block_top is still relativized.
335340
assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");
336341

342+
DEBUG_ONLY(Method* m = hf.interpreter_frame_method();)
343+
DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) // see comment in relativize_interpreted_frame_metadata()
344+
337345
// Make sure that extended_sp is kept relativized.
338-
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp(), "");
346+
assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp() + extra_space, "");
339347
}
340348

341349
#endif // CPU_RISCV_CONTINUATIONFREEZETHAW_RISCV_INLINE_HPP

‎src/hotspot/cpu/riscv/frame_riscv.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@
111111
sender_sp_offset = 0,
112112

113113
// Interpreter frames
114-
interpreter_frame_oop_temp_offset = 1, // for native calls only
114+
interpreter_frame_result_handler_offset = 1, // for native calls only
115+
interpreter_frame_oop_temp_offset = 0, // for native calls only
115116

116117
interpreter_frame_sender_sp_offset = -3,
117118
// outgoing sp before a call to an invoked method

‎src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp

+24-8
Original file line numberDiff line numberDiff line change
@@ -1639,11 +1639,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
16391639
}
16401640

16411641
// Change state to native (we save the return address in the thread, since it might not
1642-
// be pushed on the stack when we do a stack traversal).
1643-
// We use the same pc/oopMap repeatedly when we call out
1642+
// be pushed on the stack when we do a stack traversal). It is enough that the pc()
1643+
// points into the right code segment. It does not have to be the correct return pc.
1644+
// We use the same pc/oopMap repeatedly when we call out.
16441645

16451646
Label native_return;
1646-
__ set_last_Java_frame(sp, noreg, native_return, t0);
1647+
if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
1648+
// For convenience we use the pc we want to resume to in case of preemption on Object.wait.
1649+
__ set_last_Java_frame(sp, noreg, native_return, t0);
1650+
} else {
1651+
intptr_t the_pc = (intptr_t) __ pc();
1652+
oop_maps->add_gc_map(the_pc - start, map);
1653+
1654+
__ set_last_Java_frame(sp, noreg, __ pc(), t0);
1655+
}
16471656

16481657
Label dtrace_method_entry, dtrace_method_entry_done;
16491658
if (DTraceMethodProbes) {
@@ -1745,11 +1754,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
17451754

17461755
__ rt_call(native_func);
17471756

1748-
__ bind(native_return);
1749-
1750-
intptr_t return_pc = (intptr_t) __ pc();
1751-
oop_maps->add_gc_map(return_pc - start, map);
1752-
17531757
// Verify or restore cpu control state after JNI call
17541758
__ restore_cpu_control_state_after_jni(t0);
17551759

@@ -1800,6 +1804,18 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
18001804
__ sw(t0, Address(t1));
18011805
__ bind(after_transition);
18021806

1807+
if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
1808+
// Check preemption for Object.wait()
1809+
__ ld(t0, Address(xthread, JavaThread::preempt_alternate_return_offset()));
1810+
__ beqz(t0, native_return);
1811+
__ sd(zr, Address(xthread, JavaThread::preempt_alternate_return_offset()));
1812+
__ jr(t0);
1813+
__ bind(native_return);
1814+
1815+
intptr_t the_pc = (intptr_t) __ pc();
1816+
oop_maps->add_gc_map(the_pc - start, map);
1817+
}
1818+
18031819
Label reguard;
18041820
Label reguard_done;
18051821
__ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));

‎src/hotspot/cpu/riscv/stackChunkFrameStream_riscv.inline.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ inline int StackChunkFrameStream<frame_kind>::interpreter_frame_num_oops() const
114114
f.interpreted_frame_oop_map(&mask);
115115
return mask.num_oops()
116116
+ 1 // for the mirror oop
117+
+ (f.interpreter_frame_method()->is_native() ? 1 : 0) // temp oop slot
117118
+ pointer_delta_as_int((intptr_t*)f.interpreter_frame_monitor_begin(),
118119
(intptr_t*)f.interpreter_frame_monitor_end()) / BasicObjectLock::size();
119120
}

‎src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp

+25-1
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
11251125
// result handler is in x10
11261126
// set result handler
11271127
__ mv(result_handler, x10);
1128+
__ sd(x10, Address(fp, frame::interpreter_frame_result_handler_offset * wordSize));
1129+
11281130
// pass mirror handle if static call
11291131
{
11301132
Label L;
@@ -1163,6 +1165,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
11631165

11641166
// It is enough that the pc() points into the right code
11651167
// segment. It does not have to be the correct return pc.
1168+
// For convenience we use the pc we want to resume to in
1169+
// case of preemption on Object.wait.
11661170
Label native_return;
11671171
__ set_last_Java_frame(esp, fp, native_return, x30);
11681172

@@ -1184,9 +1188,13 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
11841188
__ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
11851189
__ sw(t0, Address(t1));
11861190

1191+
__ push_cont_fastpath();
1192+
11871193
// Call the native method.
11881194
__ jalr(x28);
1189-
__ bind(native_return);
1195+
1196+
__ pop_cont_fastpath();
1197+
11901198
__ get_method(xmethod);
11911199
// result potentially in x10 or f10
11921200

@@ -1252,6 +1260,21 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
12521260
__ mv(t0, _thread_in_Java);
12531261
__ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
12541262

1263+
if (LockingMode != LM_LEGACY) {
1264+
// Check preemption for Object.wait()
1265+
Label not_preempted;
1266+
__ ld(t0, Address(xthread, JavaThread::preempt_alternate_return_offset()));
1267+
__ beqz(t0, not_preempted);
1268+
__ sd(zr, Address(xthread, JavaThread::preempt_alternate_return_offset()));
1269+
__ jr(t0);
1270+
__ bind(native_return);
1271+
__ restore_after_resume(true /* is_native */);
1272+
__ bind(not_preempted);
1273+
} else {
1274+
// any pc will do so just use this one for LM_LEGACY to keep code together.
1275+
__ bind(native_return);
1276+
}
1277+
12551278
// reset_last_Java_frame
12561279
__ reset_last_Java_frame(true);
12571280

@@ -1270,6 +1293,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
12701293
{
12711294
Label no_oop;
12721295
__ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
1296+
__ ld(result_handler, Address(fp, frame::interpreter_frame_result_handler_offset * wordSize));
12731297
__ bne(t, result_handler, no_oop);
12741298
// Unbox oop result, e.g. JNIHandles::resolve result.
12751299
__ pop(ltos);

0 commit comments

Comments
 (0)
Please sign in to comment.