Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8305959: x86: Improve itable_stub #13460

Closed
wants to merge 8 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions src/hotspot/cpu/x86/macroAssembler_x86.cpp
Original file line number Diff line number Diff line change
@@ -4300,6 +4300,125 @@ void MacroAssembler::lookup_interface_method(Register recv_klass,
}
}

// Look up the method for a megamorphic invokeinterface call in a single pass over itable:
// - check recv_klass (actual object class) is a subtype of resolved_klass from CompiledICHolder
// - find a holder_klass (class that implements the method) vtable offset and get the method from vtable by index
// The target method is determined by <holder_klass, itable_index>.
// The receiver klass is in recv_klass.
// On success, the result will be in method_result, and execution falls through.
// On failure, execution transfers to the given label.
void MacroAssembler::lookup_interface_method_stub(Register recv_klass,
Register holder_klass,
Register resolved_klass,
Register method_result,
Register scan_temp,
Register temp_reg2,
Register receiver,
int itable_index,
Label& L_no_such_interface) {
assert_different_registers(recv_klass, method_result, holder_klass, resolved_klass, scan_temp, temp_reg2, receiver);
Register temp_itbl_klass = method_result;
Register temp_reg = (temp_reg2 == noreg ? recv_klass : temp_reg2); // reuse recv_klass register on 32-bit x86 impl

int vtable_base = in_bytes(Klass::vtable_start_offset());
int itentry_off = in_bytes(itableMethodEntry::method_offset());
int scan_step = itableOffsetEntry::size() * wordSize;
int vte_size = vtableEntry::size_in_bytes();
int ioffset = in_bytes(itableOffsetEntry::interface_offset());
int ooffset = in_bytes(itableOffsetEntry::offset_offset());
Address::ScaleFactor times_vte_scale = Address::times_ptr;
assert(vte_size == wordSize, "adjust times_vte_scale");

Label L_loop_scan_resolved_entry, L_resolved_found, L_holder_found;

// temp_itbl_klass = recv_klass.itable[0]
// scan_temp = &recv_klass.itable[0] + step
movl(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
movptr(temp_itbl_klass, Address(recv_klass, scan_temp, times_vte_scale, vtable_base + ioffset));
lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base + ioffset + scan_step));
xorptr(temp_reg, temp_reg);

// Initial checks:
// - if (holder_klass != resolved_klass), go to "scan for resolved"
// - if (itable[0] == 0), no such interface
// - if (itable[0] == holder_klass), shortcut to "holder found"
cmpptr(holder_klass, resolved_klass);
jccb(Assembler::notEqual, L_loop_scan_resolved_entry);
testptr(temp_itbl_klass, temp_itbl_klass);
jccb(Assembler::zero, L_no_such_interface);
cmpptr(holder_klass, temp_itbl_klass);
jccb(Assembler::equal, L_holder_found);

// Loop: Look for holder_klass record in itable
// do {
// tmp = itable[index];
// index += step;
// if (tmp == holder_klass) {
// goto L_holder_found; // Found!
// }
// } while (tmp != 0);
// goto L_no_such_interface // Not found.
Label L_scan_holder;
bind(L_scan_holder);
movptr(temp_itbl_klass, Address(scan_temp, 0));
addptr(scan_temp, scan_step);
cmpptr(holder_klass, temp_itbl_klass);
jccb(Assembler::equal, L_holder_found);
testptr(temp_itbl_klass, temp_itbl_klass);
jccb(Assembler::notZero, L_scan_holder);

jmpb(L_no_such_interface);

// Loop: Look for resolved_class record in itable
// do {
// tmp = itable[index];
// index += step;
// if (tmp == holder_klass) {
// // Also check if we have met a holder klass
// holder_tmp = itable[index-step-ioffset];
// }
// if (tmp == resolved_klass) {
// goto L_resolved_found; // Found!
// }
// } while (tmp != 0);
// goto L_no_such_interface // Not found.
//
Label L_loop_scan_resolved;
bind(L_loop_scan_resolved);
movptr(temp_itbl_klass, Address(scan_temp, 0));
addptr(scan_temp, scan_step);
bind(L_loop_scan_resolved_entry);
cmpptr(holder_klass, temp_itbl_klass);
cmovl(Assembler::equal, temp_reg, Address(scan_temp, ooffset - ioffset - scan_step));
cmpptr(resolved_klass, temp_itbl_klass);
jccb(Assembler::equal, L_resolved_found);
testptr(temp_itbl_klass, temp_itbl_klass);
jccb(Assembler::notZero, L_loop_scan_resolved);

jmpb(L_no_such_interface);

Label L_ready;

// See if we already have a holder klass. If not, go and scan for it.
bind(L_resolved_found);
testptr(temp_reg, temp_reg);
jccb(Assembler::zero, L_scan_holder);
jmpb(L_ready);

bind(L_holder_found);
movl(temp_reg, Address(scan_temp, ooffset - ioffset - scan_step));

// Finally, temp_reg contains holder_klass vtable offset
bind(L_ready);
assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
if (temp_reg2 == noreg) { // recv_klass register is clobbered for 32-bit x86 impl
load_klass(scan_temp, receiver, noreg);
movptr(method_result, Address(scan_temp, temp_reg, Address::times_1, itable_index * wordSize + itentry_off));
} else {
movptr(method_result, Address(recv_klass, temp_reg, Address::times_1, itable_index * wordSize + itentry_off));
}
}


// virtual method calling
void MacroAssembler::lookup_virtual_method(Register recv_klass,
10 changes: 10 additions & 0 deletions src/hotspot/cpu/x86/macroAssembler_x86.hpp
Original file line number Diff line number Diff line change
@@ -603,6 +603,16 @@ class MacroAssembler: public Assembler {
Label& no_such_interface,
bool return_method = true);

void lookup_interface_method_stub(Register recv_klass,
Register holder_klass,
Register resolved_klass,
Register method_result,
Register scan_temp,
Register temp_reg2,
Register receiver,
int itable_index,
Label& L_no_such_interface);

// virtual method calling
void lookup_virtual_method(Register recv_klass,
RegisterOrConstant vtable_index,
43 changes: 19 additions & 24 deletions src/hotspot/cpu/x86/vtableStubs_x86_32.cpp
Original file line number Diff line number Diff line change
@@ -179,14 +179,16 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// rax: CompiledICHolder
// rcx: Receiver

// Most registers are in use; we'll use rax, rbx, rsi, rdi
// Most registers are in use; we'll use rax, rbx, rcx, rdx, rsi, rdi
// (If we need to make rsi, rdi callee-save, do a push/pop here.)
const Register recv_klass_reg = rsi;
const Register holder_klass_reg = rax; // declaring interface klass (DECC)
const Register resolved_klass_reg = rbx; // resolved interface klass (REFC)
const Register temp_reg = rdi;
const Register resolved_klass_reg = rdi; // resolved interface klass (REFC)
const Register temp_reg = rdx;
const Register method = rbx;
const Register icholder_reg = rax;
const Register receiver = rcx;

const Register icholder_reg = rax;
__ movptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
__ movptr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));

@@ -198,35 +200,26 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
__ load_klass(recv_klass_reg, rcx, noreg);

start_pc = __ pc();
__ push(rdx); // temp_reg

// Receiver subtype check against REFC.
// Destroys recv_klass_reg value.
__ lookup_interface_method(// inputs: rec. class, interface
recv_klass_reg, resolved_klass_reg, noreg,
// outputs: scan temp. reg1, scan temp. reg2
recv_klass_reg, temp_reg,
L_no_such_interface,
/*return_method=*/false);

const ptrdiff_t typecheckSize = __ pc() - start_pc;
start_pc = __ pc();

// Get selected method from declaring class and itable index
const Register method = rbx;
__ load_klass(recv_klass_reg, rcx, noreg); // restore recv_klass_reg
__ lookup_interface_method(// inputs: rec. class, interface, itable index
recv_klass_reg, holder_klass_reg, itable_index,
// outputs: method, scan temp. reg
method, temp_reg,
L_no_such_interface);

__ lookup_interface_method_stub(recv_klass_reg, // input
holder_klass_reg, // input
resolved_klass_reg, // input
method, // output
temp_reg,
noreg,
receiver, // input (x86_32 only: to restore recv_klass value)
itable_index,
L_no_such_interface);
const ptrdiff_t lookupSize = __ pc() - start_pc;

// We expect we need index_dependent_slop extra bytes. Reason:
// The emitted code in lookup_interface_method changes when itable_index exceeds 31.
// For windows, a narrow estimate was found to be 104. Other OSes not tested.
const ptrdiff_t estimate = 104;
const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
const ptrdiff_t codesize = lookupSize + index_dependent_slop;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
@@ -246,6 +239,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
}
#endif // ASSERT

__ pop(rdx);
address ame_addr = __ pc();
__ jmp(Address(method, Method::from_compiled_offset()));

@@ -255,6 +249,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// We force resolving of the call site by jumping to the "handle
// wrong method" stub, and so let the interpreter runtime do all the
// dirty work.
__ pop(rdx);
__ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));

masm->flush();
35 changes: 14 additions & 21 deletions src/hotspot/cpu/x86/vtableStubs_x86_64.cpp
Original file line number Diff line number Diff line change
@@ -175,10 +175,12 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them)
const Register recv_klass_reg = r10;
const Register holder_klass_reg = rax; // declaring interface klass (DECC)
const Register resolved_klass_reg = rbx; // resolved interface klass (REFC)
const Register resolved_klass_reg = r14; // resolved interface klass (REFC)
const Register temp_reg = r11;
const Register temp_reg2 = r13;
const Register method = rbx;
const Register icholder_reg = rax;

const Register icholder_reg = rax;
__ movptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
__ movptr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));

@@ -192,33 +194,24 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
start_pc = __ pc();

// Receiver subtype check against REFC.
// Destroys recv_klass_reg value.
__ lookup_interface_method(// inputs: rec. class, interface
recv_klass_reg, resolved_klass_reg, noreg,
// outputs: scan temp. reg1, scan temp. reg2
recv_klass_reg, temp_reg,
L_no_such_interface,
/*return_method=*/false);

const ptrdiff_t typecheckSize = __ pc() - start_pc;
start_pc = __ pc();

// Get selected method from declaring class and itable index
const Register method = rbx;
__ load_klass(recv_klass_reg, j_rarg0, temp_reg); // restore recv_klass_reg
__ lookup_interface_method(// inputs: rec. class, interface, itable index
recv_klass_reg, holder_klass_reg, itable_index,
// outputs: method, scan temp. reg
method, temp_reg,
L_no_such_interface);
__ lookup_interface_method_stub(recv_klass_reg, // input
holder_klass_reg, // input
resolved_klass_reg, // input
method, // output
temp_reg,
temp_reg2,
noreg,
itable_index,
L_no_such_interface);

const ptrdiff_t lookupSize = __ pc() - start_pc;

// We expect we need index_dependent_slop extra bytes. Reason:
// The emitted code in lookup_interface_method changes when itable_index exceeds 15.
// For linux, a very narrow estimate would be 112, but Solaris requires some more space (130).
const ptrdiff_t estimate = 136;
const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
const ptrdiff_t codesize = lookupSize + index_dependent_slop;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
109 changes: 109 additions & 0 deletions test/micro/org/openjdk/bench/vm/compiler/InterfaceCalls.java
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.CompilerControl;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
@@ -52,6 +53,14 @@ interface SecondInterface {
public int getIntSecond();
}

interface FirstInterfaceExt extends FirstInterface {
default int getIntFirst() { return 44; }
}

interface FirstInterfaceExtExt extends FirstInterfaceExt {
default int getIntFirst() { return 45; }
}

class FirstClass implements FirstInterface, SecondInterface {
public int getIntFirst() {
return 1;
@@ -102,8 +111,80 @@ public int getIntSecond() {
}
}

class FirstClassDontInline implements FirstInterface {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -1;
}
}

class SecondClassDontInline implements FirstInterface {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -2;
}
}

class ThirdClassDontInline implements FirstInterface {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -3;
}
}

class FourthClassDontInline implements FirstInterface {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -4;
}
}

class FifthClassDontInline implements FirstInterface {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -5;
}
}

class FirstClassDontInlineExtExt implements FirstInterfaceExtExt {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -1;
}
}

class SecondClassDontInlineExtExt implements FirstInterfaceExtExt {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -2;
}
}

class ThirdClassDontInlineExtExt implements FirstInterfaceExtExt {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -3;
}
}

class FourthClassDontInlineExtExt implements FirstInterfaceExtExt {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -4;
}
}

class FifthClassDontInlineExtExt implements FirstInterfaceExtExt {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -5;
}
}

final int asLength = 5;
public FirstInterface[] as = new FirstInterface[asLength];
public FirstInterface[] noninlined = new FirstInterface[asLength];
public FirstInterfaceExtExt[] noninlinedextext = new FirstInterfaceExtExt[asLength];


@Setup
@@ -113,6 +194,18 @@ public void setupSubclass() {
as[2] = new ThirdClass();
as[3] = new FourthClass();
as[4] = new FifthClass();

noninlined[0] = new FirstClassDontInline();
noninlined[1] = new SecondClassDontInline();
noninlined[2] = new ThirdClassDontInline();
noninlined[3] = new FourthClassDontInline();
noninlined[4] = new FifthClassDontInline();

noninlinedextext[0] = new FirstClassDontInlineExtExt();
noninlinedextext[1] = new SecondClassDontInlineExtExt();
noninlinedextext[2] = new ThirdClassDontInlineExtExt();
noninlinedextext[3] = new FourthClassDontInlineExtExt();
noninlinedextext[4] = new FifthClassDontInlineExtExt();
}

/**
@@ -126,6 +219,22 @@ public int testMonomorphic() {

int l = 0;

/** Tests single base interface method call */
@Benchmark
public int testIfaceCall(Blackhole bh) {
FirstInterface ai = noninlined[l];
l = ++ l % asLength;
return ai.getIntFirst();
}

/** Tests extended interface method call */
@Benchmark
public int testIfaceExtCall(Blackhole bh) {
FirstInterfaceExtExt ai = noninlinedextext[l];
l = ++ l % asLength;
return ai.getIntFirst();
}

/**
* Interface call address computation within loop but the receiver preexists
* the loop and the ac can be moved outside of the loop