Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8293994: [lworld] Deoptimization from nmethod entry barrier breaks scalarized calling convention #773

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 23 additions & 26 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
@@ -1770,33 +1770,14 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
__ bind(L_skip_barrier);
}

if (C->max_vector_size() > 0) {
__ reinitialize_ptrue();
}

__ verified_entry(C, 0);
__ bind(*_verified_entry);

if (C->stub_function() == NULL) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
// Dummy labels for just measuring the code size
Label dummy_slow_path;
Label dummy_continuation;
Label dummy_guard;
Label* slow_path = &dummy_slow_path;
Label* continuation = &dummy_continuation;
Label* guard = &dummy_guard;
if (!Compile::current()->output()->in_scratch_emit_size()) {
// Use real labels from actual stub when not emitting code for the purpose of measuring its size
C2EntryBarrierStub* stub = Compile::current()->output()->entry_barrier_table()->add_entry_barrier();
slow_path = &stub->slow_path();
continuation = &stub->continuation();
guard = &stub->guard();
}
// In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
}
__ entry_barrier();
}

if (!Compile::current()->output()->in_scratch_emit_size()) {
__ bind(*_verified_entry);
}

if (VerifyStackAtCalls) {
@@ -2193,7 +2174,7 @@ void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const

void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
{
MacroAssembler _masm(&cbuf);
C2_MacroAssembler _masm(&cbuf);

if (!_verified) {
Label skip;
@@ -2203,12 +2184,28 @@ void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
__ bind(skip);

} else {
// insert a nop at the start of the prolog so we can patch in a
// branch if we need to invalidate the method later
__ nop();

// TODO 8284443 Avoid creation of temporary frame
if (ra_->C->stub_function() == NULL) {
__ verified_entry(ra_->C, 0);
__ entry_barrier();
int framesize = ra_->C->output()->frame_slots() << LogBytesPerInt;
__ remove_frame(framesize, false);
}
// Unpack inline type args passed as oop and then jump to
// the verified entry point (skipping the unverified entry).
int sp_inc = __ unpack_inline_args(ra_->C, _receiver_only);
// Emit code for verified entry and save increment for stack repair on return
__ verified_entry(ra_->C, sp_inc);
__ b(*_verified_entry);
if (Compile::current()->output()->in_scratch_emit_size()) {
Label dummy_verified_entry;
__ b(dummy_verified_entry);
} else {
__ b(*_verified_entry);
}
}
}

45 changes: 18 additions & 27 deletions src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -303,37 +303,32 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
cmp_klass(receiver, iCache, rscratch1);
}

void C1_MacroAssembler::build_frame_helper(int frame_size_in_bytes, int sp_inc, bool needs_stack_repair) {
void C1_MacroAssembler::build_frame_helper(int frame_size_in_bytes, int sp_offset_for_orig_pc, int sp_inc, bool has_scalarized_args, bool needs_stack_repair) {
MacroAssembler::build_frame(frame_size_in_bytes);

if (needs_stack_repair) {
save_stack_increment(sp_inc, frame_size_in_bytes);
}
}

void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, bool needs_stack_repair, bool has_scalarized_args, Label* verified_inline_entry_label) {
if (has_scalarized_args) {
// Initialize orig_pc to detect deoptimization during buffering in the entry points
str(zr, Address(sp, sp_offset_for_orig_pc - frame_size_in_bytes));
}
if (!needs_stack_repair && verified_inline_entry_label != NULL) {
bind(*verified_inline_entry_label);
str(zr, Address(sp, sp_offset_for_orig_pc));
}
}

void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, bool needs_stack_repair, bool has_scalarized_args, Label* verified_inline_entry_label) {
// Make sure there is enough stack space for this method's activation.
// Note that we do this before creating a frame.
assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
generate_stack_overflow_check(bang_size_in_bytes);

build_frame_helper(frame_size_in_bytes, 0, needs_stack_repair);
build_frame_helper(frame_size_in_bytes, sp_offset_for_orig_pc, 0, has_scalarized_args, needs_stack_repair);

// Insert nmethod entry barrier into frame.
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(this, NULL /* slow_path */, NULL /* continuation */, NULL /* guard */);

if (needs_stack_repair && verified_inline_entry_label != NULL) {
// Jump here from the scalarized entry points that require additional stack space
// for packing scalarized arguments and therefore already created the frame.
if (verified_inline_entry_label != NULL) {
// Jump here from the scalarized entry points that already created the frame.
bind(*verified_inline_entry_label);
}
}
@@ -365,20 +360,12 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f
int args_passed = sig->length();
int args_passed_cc = SigEntry::fill_sig_bt(sig_cc, sig_bt);

// Check if we need to extend the stack for packing
int sp_inc = 0;
if (args_on_stack > args_on_stack_cc) {
sp_inc = extend_stack_for_inline_args(args_on_stack);
}

// Create a temp frame so we can call into the runtime. It must be properly set up to accommodate GC.
build_frame_helper(frame_size_in_bytes, sp_inc, ces->c1_needs_stack_repair());

// Initialize orig_pc to detect deoptimization during buffering in below runtime call
str(zr, Address(sp, sp_offset_for_orig_pc));
build_frame_helper(frame_size_in_bytes, sp_offset_for_orig_pc, 0, true, ces->c1_needs_stack_repair());

// The runtime call might safepoint, make sure nmethod entry barrier is executed
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
// C1 code is not hot enough to micro optimize the nmethod entry barrier with an out-of-line stub
bs->nmethod_entry_barrier(this, NULL /* slow_path */, NULL /* continuation */, NULL /* guard */);

// FIXME -- call runtime only if we cannot in-line allocate all the incoming inline type args.
@@ -397,16 +384,20 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f
// Remove the temp frame
MacroAssembler::remove_frame(frame_size_in_bytes);

// Check if we need to extend the stack for packing
int sp_inc = 0;
if (args_on_stack > args_on_stack_cc) {
sp_inc = extend_stack_for_inline_args(args_on_stack);
}

shuffle_inline_args(true, is_inline_ro_entry, sig_cc,
args_passed_cc, args_on_stack_cc, regs_cc, // from
args_passed, args_on_stack, regs, // to
sp_inc, val_array);

if (ces->c1_needs_stack_repair()) {
// Create the real frame. Below jump will then skip over the stack banging and frame
// setup code in the verified_inline_entry (which has a different real_frame_size).
build_frame_helper(frame_size_in_bytes, sp_inc, true);
}
// Create the real frame. Below jump will then skip over the stack banging and frame
// setup code in the verified_inline_entry (which has a different real_frame_size).
build_frame_helper(frame_size_in_bytes, sp_offset_for_orig_pc, sp_inc, true, ces->c1_needs_stack_repair());

b(verified_inline_entry_label);
return rt_call_offset;
22 changes: 22 additions & 0 deletions src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -45,6 +45,28 @@

typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);

void C2_MacroAssembler::entry_barrier() {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
// Dummy labels for just measuring the code size
Label dummy_slow_path;
Label dummy_continuation;
Label dummy_guard;
Label* slow_path = &dummy_slow_path;
Label* continuation = &dummy_continuation;
Label* guard = &dummy_guard;
if (!Compile::current()->output()->in_scratch_emit_size()) {
// Use real labels from actual stub when not emitting code for the purpose of measuring its size
C2EntryBarrierStub* stub = Compile::current()->output()->entry_barrier_table()->add_entry_barrier();
slow_path = &stub->slow_path();
continuation = &stub->continuation();
guard = &stub->guard();
}
// In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
bs->nmethod_entry_barrier(this, slow_path, continuation, guard);
}
}

void C2_MacroAssembler::emit_entry_barrier_stub(C2EntryBarrierStub* stub) {
bind(stub->slow_path());
movptr(rscratch1, (uintptr_t) StubRoutines::aarch64::method_entry_barrier());
1 change: 1 addition & 0 deletions src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
@@ -35,6 +35,7 @@
enum shift_kind kind = Assembler::LSL, unsigned shift = 0);

public:
void entry_barrier();
void emit_entry_barrier_stub(C2EntryBarrierStub* stub);
static int entry_barrier_stub_size();

31 changes: 27 additions & 4 deletions src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -163,6 +163,31 @@ static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
return barrier;
}

static void set_value(nmethod* nm, jint val) {
NativeNMethodBarrier* cmp1 = native_nmethod_barrier(nm);
cmp1->set_value(nm, val);

if (!nm->is_osr_method() && nm->method()->has_scalarized_args()) {
// nmethods with scalarized arguments have multiple entry points that each have an own nmethod entry barrier
assert(nm->verified_entry_point() != nm->verified_inline_entry_point(), "scalarized entry point not found");
address method_body = nm->is_compiled_by_c1() ? nm->verified_inline_entry_point() : nm->verified_entry_point();
address entry_point2 = nm->is_compiled_by_c1() ? nm->verified_entry_point() : nm->verified_inline_entry_point();

int barrier_offset = reinterpret_cast<address>(cmp1) - method_body;
NativeNMethodBarrier* cmp2 = reinterpret_cast<NativeNMethodBarrier*>(entry_point2 + barrier_offset);
assert(cmp1 != cmp2, "sanity");
debug_only(cmp2->verify());
cmp2->set_value(nm, val);

if (method_body != nm->verified_inline_ro_entry_point() && entry_point2 != nm->verified_inline_ro_entry_point()) {
NativeNMethodBarrier* cmp3 = reinterpret_cast<NativeNMethodBarrier*>(nm->verified_inline_ro_entry_point() + barrier_offset);
assert(cmp1 != cmp3 && cmp2 != cmp3, "sanity");
debug_only(cmp3->verify());
cmp3->set_value(nm, val);
}
}
}

void BarrierSetNMethod::disarm(nmethod* nm) {
if (!supports_entry_barrier(nm)) {
return;
@@ -179,8 +204,7 @@ void BarrierSetNMethod::disarm(nmethod* nm) {

// Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
// Symmetric "LDR; DMB ISHLD" is in the nmethod barrier.
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
barrier->set_value(nm, disarmed_value());
set_value(nm, disarmed_value());
}

void BarrierSetNMethod::arm(nmethod* nm, int arm_value) {
@@ -199,8 +223,7 @@ void BarrierSetNMethod::arm(nmethod* nm, int arm_value) {
bs_asm->increment_patching_epoch();
}

NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
barrier->set_value(nm, arm_value);
set_value(nm, arm_value);
}

bool BarrierSetNMethod::is_armed(nmethod* nm) {
12 changes: 5 additions & 7 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -5951,18 +5951,16 @@ void MacroAssembler::get_thread(Register dst) {
// C2 compiled method's prolog code
// Moved here from aarch64.ad to support Valhalla code belows
void MacroAssembler::verified_entry(Compile* C, int sp_inc) {

// n.b. frame size includes space for return pc and rfp
const long framesize = C->output()->frame_size_in_bytes();

// insert a nop at the start of the prolog so we can patch in a
// branch if we need to invalidate the method later
nop();
if (C->max_vector_size() > 0) {
reinitialize_ptrue();
}

int bangsize = C->output()->bang_size_in_bytes();
if (C->output()->need_stack_bang(bangsize))
generate_stack_overflow_check(bangsize);

// n.b. frame size includes space for return pc and rfp
const long framesize = C->output()->frame_size_in_bytes();
build_frame(framesize);

if (C->needs_stack_repair()) {
7 changes: 4 additions & 3 deletions src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -615,6 +615,8 @@ static void gen_c2i_adapter(MacroAssembler *masm,
int& frame_complete,
int& frame_size_in_words,
bool alloc_inline_receiver) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->c2i_entry_barrier(masm);

// Before we get into the guts of the C2I adapter, see if we should be here
// at all. We've come from compiled code and are attempting to jump to the
@@ -1063,9 +1065,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm
c2i_no_clinit_check_entry = __ pc();
}

BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->c2i_entry_barrier(masm);

gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, true);

address c2i_unverified_inline_entry = c2i_unverified_entry;
@@ -1078,6 +1077,8 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm
gen_inline_cache_check(masm, inline_entry_skip_fixup);

c2i_inline_entry = __ pc();
// TODO 8294013 Fix this and add tests
c2i_no_clinit_check_entry = __ pc();
gen_c2i_adapter(masm, sig, regs, inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
}

Loading