diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 2b8ed2e85632..bd01095ba9ca 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1920,7 +1920,24 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { if (C->stub_function() == NULL) { BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->nmethod_entry_barrier(&_masm); + if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) { + // Dummy labels for just measuring the code size + Label dummy_slow_path; + Label dummy_continuation; + Label dummy_guard; + Label* slow_path = &dummy_slow_path; + Label* continuation = &dummy_continuation; + Label* guard = &dummy_guard; + if (!Compile::current()->output()->in_scratch_emit_size()) { + // Use real labels from actual stub when not emitting code for the purpose of measuring its size + C2EntryBarrierStub* stub = Compile::current()->output()->entry_barrier_table()->add_entry_barrier(); + slow_path = &stub->slow_path(); + continuation = &stub->continuation(); + guard = &stub->guard(); + } + // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub. + bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard); + } } if (VerifyStackAtCalls) { diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index afd5be0db46e..81cf2b8fbd0b 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -298,7 +298,7 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { // Insert nmethod entry barrier into frame. BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->nmethod_entry_barrier(this); + bs->nmethod_entry_barrier(this, NULL /* slow_path */, NULL /* continuation */, NULL /* guard */); } void C1_MacroAssembler::remove_frame(int framesize) { diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 83d9e1f182ce..26b2ab9a5fad 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -28,6 +28,7 @@ #include "opto/c2_MacroAssembler.hpp" #include "opto/intrinsicnode.hpp" #include "opto/matcher.hpp" +#include "opto/output.hpp" #include "opto/subnode.hpp" #include "runtime/stubRoutines.hpp" @@ -43,6 +44,21 @@ typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr); +void C2_MacroAssembler::emit_entry_barrier_stub(C2EntryBarrierStub* stub) { + bind(stub->slow_path()); + movptr(rscratch1, (uintptr_t) StubRoutines::aarch64::method_entry_barrier()); + blr(rscratch1); + b(stub->continuation()); + + bind(stub->guard()); + relocate(entry_guard_Relocation::spec()); + emit_int32(0); // nmethod guard value +} + +int C2_MacroAssembler::entry_barrier_stub_size() { + return 4 * 6; +} + // Search for str1 in str2 and return index or -1 void C2_MacroAssembler::string_indexof(Register str2, Register str1, Register cnt2, Register cnt1, diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index cc65f4692eaf..289b9c7322f0 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -28,8 +28,8 @@ // C2_MacroAssembler contains high-level macros for C2 public: - void emit_entry_barrier_stub(C2EntryBarrierStub* stub) {} - static int entry_barrier_stub_size() { return 0; } + void emit_entry_barrier_stub(C2EntryBarrierStub* stub); + static int entry_barrier_stub_size(); void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp index 28b1dcde30ec..15e655b30441 100644 --- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp @@ -246,18 +246,38 @@ void BarrierSetAssembler::clear_patching_epoch() { _patching_epoch = 0; } -void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { +void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard) { BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); if (bs_nm == NULL) { return; } - Label skip_barrier, guard; + Label local_guard; + Label skip_barrier; + NMethodPatchingType patching_type = nmethod_patching_type(); - __ ldrw(rscratch1, guard); + if (slow_path == NULL) { + guard = &local_guard; + } + + // If the slow path is out of line in a stub, we flip the condition + Assembler::Condition condition = slow_path == NULL ? Assembler::EQ : Assembler::NE; + Label& barrier_target = slow_path == NULL ? skip_barrier : *slow_path; - if (nmethod_code_patching()) { + __ ldrw(rscratch1, *guard); + + if (patching_type == NMethodPatchingType::stw_instruction_and_data_patch) { + // With STW patching, no data or instructions are updated concurrently, + // which means there isn't really any need for any fencing for neither + // data nor instruction modifications happening concurrently. The + // instruction patching is handled with isb fences on the way back + // from the safepoint to Java. So here we can do a plain conditional + // branch with no fencing. + Address thread_disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset())); + __ ldrw(rscratch2, thread_disarmed_addr); + __ cmp(rscratch1, rscratch2); + } else if (patching_type == NMethodPatchingType::conc_instruction_and_data_patch) { // If we patch code we need both a code patching and a loadload // fence. It's not super cheap, so we use a global epoch mechanism // to hide them in a slow path. @@ -278,24 +298,28 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { Address thread_disarmed_and_epoch_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset())); __ ldr(rscratch2, thread_disarmed_and_epoch_addr); __ cmp(rscratch1, rscratch2); - __ br(Assembler::EQ, skip_barrier); } else { + assert(patching_type == NMethodPatchingType::conc_data_patch, "must be"); // Subsequent loads of oops must occur after load of guard value. // BarrierSetNMethod::disarm sets guard with release semantics. __ membar(__ LoadLoad); Address thread_disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset())); __ ldrw(rscratch2, thread_disarmed_addr); __ cmpw(rscratch1, rscratch2); - __ br(Assembler::EQ, skip_barrier); } + __ br(condition, barrier_target); - __ movptr(rscratch1, (uintptr_t) StubRoutines::aarch64::method_entry_barrier()); - __ blr(rscratch1); - __ b(skip_barrier); + if (slow_path == NULL) { + __ movptr(rscratch1, (uintptr_t) StubRoutines::aarch64::method_entry_barrier()); + __ blr(rscratch1); + __ b(skip_barrier); - __ bind(guard); + __ bind(local_guard); - __ emit_int32(0); // nmethod guard value. Skipped over in common case. + __ emit_int32(0); // nmethod guard value. Skipped over in common case. + } else { + __ bind(*continuation); + } __ bind(skip_barrier); } diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp index 5e7a5b97ef91..522cb9b53e13 100644 --- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp @@ -31,6 +31,12 @@ #include "memory/allocation.hpp" #include "oops/access.hpp" +enum class NMethodPatchingType { + stw_instruction_and_data_patch, + conc_instruction_and_data_patch, + conc_data_patch +}; + class BarrierSetAssembler: public CHeapObj { private: void incr_allocated_bytes(MacroAssembler* masm, @@ -68,9 +74,9 @@ class BarrierSetAssembler: public CHeapObj { ); virtual void barrier_stubs_init() {} - virtual bool nmethod_code_patching() { return true; } + virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::stw_instruction_and_data_patch; } - virtual void nmethod_entry_barrier(MacroAssembler* masm); + virtual void nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard); virtual void c2i_entry_barrier(MacroAssembler* masm); static address patching_epoch_addr(); diff --git a/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp index fd25dcf3ace8..d1cac3d00c2f 100644 --- a/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shared/barrierSetNMethod_aarch64.cpp @@ -37,29 +37,62 @@ #include "utilities/align.hpp" #include "utilities/debug.hpp" +static int slow_path_size(nmethod* nm) { + // The slow path code is out of line with C2 + return nm->is_compiled_by_c2() ? 0 : 6; +} + +// This is the offset of the entry barrier from where the frame is completed. +// If any code changes between the end of the verified entry where the entry +// barrier resides, and the completion of the frame, then +// NativeNMethodCmpBarrier::verify() will immediately complain when it does +// not find the expected native instruction at this offset, which needs updating. +// Note that this offset is invariant of PreserveFramePointer. +static int entry_barrier_offset(nmethod* nm) { + BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler(); + switch (bs_asm->nmethod_patching_type()) { + case NMethodPatchingType::stw_instruction_and_data_patch: + return -4 * (4 + slow_path_size(nm)); + case NMethodPatchingType::conc_instruction_and_data_patch: + return -4 * (10 + slow_path_size(nm)); + case NMethodPatchingType::conc_data_patch: + return -4 * (5 + slow_path_size(nm)); + } + ShouldNotReachHere(); + return 0; +} + class NativeNMethodBarrier: public NativeInstruction { address instruction_address() const { return addr_at(0); } - int guard_offset() { - BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler(); - if (bs_asm->nmethod_code_patching()) { - return 4 * 15; - } else { - return 4 * 10; - } + int local_guard_offset(nmethod* nm) { + // It's the last instruction + return (-entry_barrier_offset(nm)) - 4; } - int *guard_addr() { - return reinterpret_cast(instruction_address() + guard_offset()); + int *guard_addr(nmethod* nm) { + if (nm->is_compiled_by_c2()) { + // With c2 compiled code, the guard is out-of-line in a stub + // We find it using the RelocIterator. + RelocIterator iter(nm); + while (iter.next()) { + if (iter.type() == relocInfo::entry_guard_type) { + entry_guard_Relocation* const reloc = iter.entry_guard_reloc(); + return reinterpret_cast(reloc->addr()); + } + } + ShouldNotReachHere(); + } + return reinterpret_cast(instruction_address() + local_guard_offset(nm)); } public: - int get_value() { - return Atomic::load_acquire(guard_addr()); + int get_value(nmethod* nm) { + return Atomic::load_acquire(guard_addr(nm)); } - void set_value(int value) { - Atomic::release_store(guard_addr(), value); + void set_value(nmethod* nm, int value) { + Atomic::release_store(guard_addr(nm), value); } void verify() const; @@ -120,24 +153,8 @@ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); } -// This is the offset of the entry barrier from where the frame is completed. -// If any code changes between the end of the verified entry where the entry -// barrier resides, and the completion of the frame, then -// NativeNMethodCmpBarrier::verify() will immediately complain when it does -// not find the expected native instruction at this offset, which needs updating. -// Note that this offset is invariant of PreserveFramePointer. - -static int entry_barrier_offset() { - BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler(); - if (bs_asm->nmethod_code_patching()) { - return -4 * 16; - } else { - return -4 * 11; - } -} - static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { - address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(); + address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm); NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); debug_only(barrier->verify()); return barrier; @@ -160,7 +177,7 @@ void BarrierSetNMethod::disarm(nmethod* nm) { // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. // Symmetric "LDR; DMB ISHLD" is in the nmethod barrier. NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); - barrier->set_value(disarmed_value()); + barrier->set_value(nm, disarmed_value()); } void BarrierSetNMethod::arm(nmethod* nm, int arm_value) { @@ -180,7 +197,7 @@ void BarrierSetNMethod::arm(nmethod* nm, int arm_value) { } NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); - barrier->set_value(arm_value); + barrier->set_value(nm, arm_value); } bool BarrierSetNMethod::is_armed(nmethod* nm) { @@ -189,5 +206,5 @@ bool BarrierSetNMethod::is_armed(nmethod* nm) { } NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); - return barrier->get_value() != disarmed_value(); + return barrier->get_value(nm) != disarmed_value(); } diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp index de3cdf2b236f..b02601540ca9 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp @@ -62,7 +62,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); - virtual bool nmethod_code_patching() { return false; } + virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_data_patch; } #ifdef COMPILER1 void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp index 1b9a5cc71c15..cd2c25469488 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp @@ -76,7 +76,7 @@ class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { Register tmp, Label& slowpath); - virtual bool nmethod_code_patching() { return false; } + virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_data_patch; } #ifdef COMPILER1 void generate_c1_load_barrier_test(LIR_Assembler* ce, diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index d694b010ffc4..c6a65f55cedc 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -4475,7 +4475,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { // ordered with respected to oop accesses. // Using immediate literals would necessitate ISBs. BarrierSet* bs = BarrierSet::barrier_set(); - if ((bs->barrier_set_nmethod() != NULL && !bs->barrier_set_assembler()->nmethod_code_patching()) || !immediate) { + if ((bs->barrier_set_nmethod() != NULL && bs->barrier_set_assembler()->nmethod_patching_type() == NMethodPatchingType::conc_data_patch) || !immediate) { address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address ldr_constant(dst, Address(dummy, rspec)); } else diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index 9bef3f559164..254b36f4c1f7 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -1424,7 +1424,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ sub(sp, sp, stack_size - 2*wordSize); BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->nmethod_entry_barrier(masm); + bs->nmethod_entry_barrier(masm, NULL /* slow_path */, NULL /* continuation */, NULL /* guard */); // Frame is now completed as far as size and linkage. int frame_complete = ((intptr_t)__ pc()) - start; diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index 073ca97b344c..26fa9cafbc17 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -5145,7 +5145,7 @@ class StubGenerator: public StubCodeGenerator { return entry; } - address generate_method_entry_barrier() { + address generate_method_entry_barrier() { __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); @@ -5155,10 +5155,10 @@ class StubGenerator: public StubCodeGenerator { BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler(); - if (bs_asm->nmethod_code_patching()) { + if (bs_asm->nmethod_patching_type() == NMethodPatchingType::conc_instruction_and_data_patch) { BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); // We can get here despite the nmethod being good, if we have not - // yet applied our cross modification fence. + // yet applied our cross modification fence (or data fence). Address thread_epoch_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset()) + 4); __ lea(rscratch2, ExternalAddress(bs_asm->patching_epoch_addr())); __ ldrw(rscratch2, rscratch2); diff --git a/src/hotspot/share/code/relocInfo.hpp b/src/hotspot/share/code/relocInfo.hpp index db5b55db5bcc..33e342a3707c 100644 --- a/src/hotspot/share/code/relocInfo.hpp +++ b/src/hotspot/share/code/relocInfo.hpp @@ -269,6 +269,7 @@ class relocInfo { runtime_call_w_cp_type = 14, // Runtime call which may load its target from the constant pool data_prefix_tag = 15, // tag for a prefix (carries data arguments) post_call_nop_type = 16, // A tag for post call nop relocations + entry_guard_type = 17, // A tag for an nmethod entry barrier guard value type_mask = 31 // A mask which selects only the above values }; @@ -309,6 +310,7 @@ class relocInfo { visitor(section_word) \ visitor(trampoline_stub) \ visitor(post_call_nop) \ + visitor(entry_guard) \ public: @@ -883,6 +885,19 @@ class post_call_nop_Relocation : public Relocation { } }; +class entry_guard_Relocation : public Relocation { + friend class RelocIterator; + +public: + entry_guard_Relocation() : Relocation(relocInfo::entry_guard_type) { } + + static RelocationHolder spec() { + RelocationHolder rh = newHolder(); + new(rh) entry_guard_Relocation(); + return rh; + } +}; + // A CallRelocation always points at a call instruction. // It is PC-relative on most machines. class CallRelocation : public Relocation { diff --git a/src/hotspot/share/opto/output.hpp b/src/hotspot/share/opto/output.hpp index 7e47fdf01b6b..7393f4facec3 100644 --- a/src/hotspot/share/opto/output.hpp +++ b/src/hotspot/share/opto/output.hpp @@ -118,14 +118,18 @@ class C2SafepointPollStubTable { class C2EntryBarrierStub: public ResourceObj { Label _slow_path; Label _continuation; + Label _guard; // Used on AArch64 public: C2EntryBarrierStub() : _slow_path(), - _continuation() {} + _continuation(), + _guard() {} Label& slow_path() { return _slow_path; } Label& continuation() { return _continuation; } + Label& guard() { return _guard; } + }; class C2EntryBarrierStubTable {