Skip to content

Commit 8460072

Browse files
zifeihanRealFYang
authored andcommittedJan 17, 2025
8342881: RISC-V: secondary_super_cache does not scale well: C1 and interpreter
Reviewed-by: fyang, fjiang
1 parent f5573f5 commit 8460072

File tree

5 files changed

+351
-74
lines changed

5 files changed

+351
-74
lines changed
 

‎src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -883,7 +883,13 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
883883
__ ld(x10, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // super klass
884884

885885
Label miss;
886-
__ check_klass_subtype_slow_path(x14, x10, x12, x15, nullptr, &miss);
886+
__ check_klass_subtype_slow_path(x14, /*sub_klass*/
887+
x10, /*super_klass*/
888+
x12, /*tmp1_reg*/
889+
x15, /*tmp2_reg*/
890+
nullptr, /*L_success*/
891+
&miss /*L_failure*/);
892+
// Need extras for table lookup: x7, x11, x13
887893

888894
// fallthrough on success:
889895
__ mv(t0, 1);

‎src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

+242-33
Original file line numberDiff line numberDiff line change
@@ -2869,6 +2869,24 @@ void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2
28692869
orr(Rd, tmp1, Rd);
28702870
}
28712871

2872+
// rotate right with shift bits
2873+
void MacroAssembler::ror(Register dst, Register src, Register shift, Register tmp)
2874+
{
2875+
if (UseZbb) {
2876+
rorr(dst, src, shift);
2877+
return;
2878+
}
2879+
2880+
assert_different_registers(dst, tmp);
2881+
assert_different_registers(src, tmp);
2882+
2883+
mv(tmp, 64);
2884+
sub(tmp, tmp, shift);
2885+
sll(tmp, src, tmp);
2886+
srl(dst, src, shift);
2887+
orr(dst, dst, tmp);
2888+
}
2889+
28722890
// rotate right with shift bits
28732891
void MacroAssembler::ror(Register dst, Register src, uint32_t shift, Register tmp)
28742892
{
@@ -4067,12 +4085,10 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
40674085
Label* L_failure,
40684086
Label* L_slow_path,
40694087
Register super_check_offset) {
4070-
assert_different_registers(sub_klass, super_klass, tmp_reg);
4071-
bool must_load_sco = (super_check_offset == noreg);
4088+
assert_different_registers(sub_klass, super_klass, tmp_reg, super_check_offset);
4089+
bool must_load_sco = !super_check_offset->is_valid();
40724090
if (must_load_sco) {
40734091
assert(tmp_reg != noreg, "supply either a temp or a register offset");
4074-
} else {
4075-
assert_different_registers(sub_klass, super_klass, super_check_offset);
40764092
}
40774093

40784094
Label L_fallthrough;
@@ -4108,6 +4124,7 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
41084124
add(t0, sub_klass, super_check_offset);
41094125
Address super_check_addr(t0);
41104126
ld(t0, super_check_addr); // load displayed supertype
4127+
beq(super_klass, t0, *L_success);
41114128

41124129
// This check has worked decisively for primary supers.
41134130
// Secondary supers are sought in the super_cache ('super_cache_addr').
@@ -4120,7 +4137,6 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
41204137
// So if it was a primary super, we can just fail immediately.
41214138
// Otherwise, it's the slow path for us (no success at this point).
41224139

4123-
beq(super_klass, t0, *L_success);
41244140
mv(t1, sc_offset);
41254141
if (L_failure == &L_fallthrough) {
41264142
beq(super_check_offset, t1, *L_slow_path);
@@ -4149,12 +4165,13 @@ void MacroAssembler::repne_scan(Register addr, Register value, Register count,
41494165
bind(Lexit);
41504166
}
41514167

4152-
void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
4153-
Register super_klass,
4154-
Register tmp1_reg,
4155-
Register tmp2_reg,
4156-
Label* L_success,
4157-
Label* L_failure) {
4168+
void MacroAssembler::check_klass_subtype_slow_path_linear(Register sub_klass,
4169+
Register super_klass,
4170+
Register tmp1_reg,
4171+
Register tmp2_reg,
4172+
Label* L_success,
4173+
Label* L_failure,
4174+
bool set_cond_codes) {
41584175
assert_different_registers(sub_klass, super_klass, tmp1_reg);
41594176
if (tmp2_reg != noreg) {
41604177
assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
@@ -4228,7 +4245,9 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
42284245
bne(t1, t0, *L_failure);
42294246

42304247
// Success. Cache the super we found an proceed in triumph.
4231-
sd(super_klass, super_cache_addr);
4248+
if (UseSecondarySupersCache) {
4249+
sd(super_klass, super_cache_addr);
4250+
}
42324251

42334252
if (L_success != &L_fallthrough) {
42344253
j(*L_success);
@@ -4269,6 +4288,103 @@ void MacroAssembler::population_count(Register dst, Register src,
42694288
}
42704289
}
42714290

4291+
// If Register r is invalid, remove a new register from
4292+
// available_regs, and add new register to regs_to_push.
4293+
Register MacroAssembler::allocate_if_noreg(Register r,
4294+
RegSetIterator<Register> &available_regs,
4295+
RegSet &regs_to_push) {
4296+
if (!r->is_valid()) {
4297+
r = *available_regs++;
4298+
regs_to_push += r;
4299+
}
4300+
return r;
4301+
}
4302+
4303+
// check_klass_subtype_slow_path_table() looks for super_klass in the
4304+
// hash table belonging to super_klass, branching to L_success or
4305+
// L_failure as appropriate. This is essentially a shim which
4306+
// allocates registers as necessary then calls
4307+
// lookup_secondary_supers_table() to do the work. Any of the tmp
4308+
// regs may be noreg, in which case this logic will chooses some
4309+
// registers push and pop them from the stack.
4310+
void MacroAssembler::check_klass_subtype_slow_path_table(Register sub_klass,
4311+
Register super_klass,
4312+
Register tmp1_reg,
4313+
Register tmp2_reg,
4314+
Label* L_success,
4315+
Label* L_failure,
4316+
bool set_cond_codes) {
4317+
RegSet tmps = RegSet::of(tmp1_reg, tmp2_reg);
4318+
4319+
assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg);
4320+
4321+
Label L_fallthrough;
4322+
int label_nulls = 0;
4323+
if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; }
4324+
if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; }
4325+
assert(label_nulls <= 1, "at most one null in the batch");
4326+
4327+
BLOCK_COMMENT("check_klass_subtype_slow_path");
4328+
4329+
RegSet caller_save_regs = RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31);
4330+
RegSetIterator<Register> available_regs = (caller_save_regs - tmps - sub_klass - super_klass).begin();
4331+
4332+
RegSet pushed_regs;
4333+
4334+
tmp1_reg = allocate_if_noreg(tmp1_reg, available_regs, pushed_regs);
4335+
tmp2_reg = allocate_if_noreg(tmp2_reg, available_regs, pushed_regs);
4336+
4337+
Register tmp3_reg = noreg, tmp4_reg = noreg, result_reg = noreg;
4338+
4339+
tmp3_reg = allocate_if_noreg(tmp3_reg, available_regs, pushed_regs);
4340+
tmp4_reg = allocate_if_noreg(tmp4_reg, available_regs, pushed_regs);
4341+
result_reg = allocate_if_noreg(result_reg, available_regs, pushed_regs);
4342+
4343+
push_reg(pushed_regs, sp);
4344+
4345+
lookup_secondary_supers_table_var(sub_klass,
4346+
super_klass,
4347+
result_reg,
4348+
tmp1_reg, tmp2_reg, tmp3_reg,
4349+
tmp4_reg, nullptr);
4350+
4351+
// Move the result to t1 as we are about to unspill the tmp registers.
4352+
mv(t1, result_reg);
4353+
4354+
// Unspill the tmp. registers:
4355+
pop_reg(pushed_regs, sp);
4356+
4357+
// NB! Callers may assume that, when set_cond_codes is true, this
4358+
// code sets tmp2_reg to a nonzero value.
4359+
if (set_cond_codes) {
4360+
mv(tmp2_reg, 1);
4361+
}
4362+
4363+
bnez(t1, *L_failure);
4364+
4365+
if (L_success != &L_fallthrough) {
4366+
j(*L_success);
4367+
}
4368+
4369+
bind(L_fallthrough);
4370+
}
4371+
4372+
void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
4373+
Register super_klass,
4374+
Register tmp1_reg,
4375+
Register tmp2_reg,
4376+
Label* L_success,
4377+
Label* L_failure,
4378+
bool set_cond_codes) {
4379+
if (UseSecondarySupersTable) {
4380+
check_klass_subtype_slow_path_table
4381+
(sub_klass, super_klass, tmp1_reg, tmp2_reg, L_success, L_failure, set_cond_codes);
4382+
} else {
4383+
check_klass_subtype_slow_path_linear
4384+
(sub_klass, super_klass, tmp1_reg, tmp2_reg, L_success, L_failure, set_cond_codes);
4385+
}
4386+
}
4387+
42724388
// Ensure that the inline code and the stub are using the same registers
42734389
// as we need to call the stub from inline code when there is a collision
42744390
// in the hashed lookup in the secondary supers array.
@@ -4284,17 +4400,16 @@ do {
42844400
(r_bitmap == x16 || r_bitmap == noreg), "registers must match riscv.ad"); \
42854401
} while(0)
42864402

4287-
// Return true: we succeeded in generating this code
4288-
bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
4289-
Register r_super_klass,
4290-
Register result,
4291-
Register tmp1,
4292-
Register tmp2,
4293-
Register tmp3,
4294-
Register tmp4,
4295-
u1 super_klass_slot,
4296-
bool stub_is_near) {
4297-
assert_different_registers(r_sub_klass, r_super_klass, result, tmp1, tmp2, tmp3, tmp4, t0);
4403+
bool MacroAssembler::lookup_secondary_supers_table_const(Register r_sub_klass,
4404+
Register r_super_klass,
4405+
Register result,
4406+
Register tmp1,
4407+
Register tmp2,
4408+
Register tmp3,
4409+
Register tmp4,
4410+
u1 super_klass_slot,
4411+
bool stub_is_near) {
4412+
assert_different_registers(r_sub_klass, r_super_klass, result, tmp1, tmp2, tmp3, tmp4, t0, t1);
42984413

42994414
Label L_fallthrough;
43004415

@@ -4369,6 +4484,98 @@ bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
43694484
return true;
43704485
}
43714486

4487+
// At runtime, return 0 in result if r_super_klass is a superclass of
4488+
// r_sub_klass, otherwise return nonzero. Use this version of
4489+
// lookup_secondary_supers_table() if you don't know ahead of time
4490+
// which superclass will be searched for. Used by interpreter and
4491+
// runtime stubs. It is larger and has somewhat greater latency than
4492+
// the version above, which takes a constant super_klass_slot.
4493+
void MacroAssembler::lookup_secondary_supers_table_var(Register r_sub_klass,
4494+
Register r_super_klass,
4495+
Register result,
4496+
Register tmp1,
4497+
Register tmp2,
4498+
Register tmp3,
4499+
Register tmp4,
4500+
Label *L_success) {
4501+
assert_different_registers(r_sub_klass, r_super_klass, result, tmp1, tmp2, tmp3, tmp4, t0, t1);
4502+
4503+
Label L_fallthrough;
4504+
4505+
BLOCK_COMMENT("lookup_secondary_supers_table {");
4506+
4507+
const Register
4508+
r_array_index = tmp3,
4509+
r_bitmap = tmp4,
4510+
slot = t1;
4511+
4512+
lbu(slot, Address(r_super_klass, Klass::hash_slot_offset()));
4513+
4514+
// Make sure that result is nonzero if the test below misses.
4515+
mv(result, 1);
4516+
4517+
ld(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset()));
4518+
4519+
// First check the bitmap to see if super_klass might be present. If
4520+
// the bit is zero, we are certain that super_klass is not one of
4521+
// the secondary supers.
4522+
4523+
// This next instruction is equivalent to:
4524+
// mv(tmp_reg, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 1));
4525+
// sub(r_array_index, slot, tmp_reg);
4526+
xori(r_array_index, slot, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 1));
4527+
sll(r_array_index, r_bitmap, r_array_index);
4528+
test_bit(t0, r_array_index, Klass::SECONDARY_SUPERS_TABLE_SIZE - 1);
4529+
beqz(t0, L_fallthrough);
4530+
4531+
// Get the first array index that can contain super_klass into r_array_index.
4532+
population_count(r_array_index, r_array_index, tmp1, tmp2);
4533+
4534+
// NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.
4535+
4536+
const Register
4537+
r_array_base = tmp1,
4538+
r_array_length = tmp2;
4539+
4540+
// The value i in r_array_index is >= 1, so even though r_array_base
4541+
// points to the length, we don't need to adjust it to point to the data.
4542+
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
4543+
assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code");
4544+
4545+
// We will consult the secondary-super array.
4546+
ld(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));
4547+
4548+
shadd(result, r_array_index, r_array_base, result, LogBytesPerWord);
4549+
ld(result, Address(result));
4550+
xorr(result, result, r_super_klass);
4551+
beqz(result, L_success ? *L_success : L_fallthrough); // Found a match
4552+
4553+
// Is there another entry to check? Consult the bitmap.
4554+
ror(r_bitmap, r_bitmap, slot);
4555+
test_bit(t0, r_bitmap, 1);
4556+
beqz(t0, L_fallthrough);
4557+
4558+
// The slot we just inspected is at secondary_supers[r_array_index - 1].
4559+
// The next slot to be inspected, by the logic we're about to call,
4560+
// is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap
4561+
// have been checked.
4562+
lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index,
4563+
r_bitmap, result, r_array_length, false /*is_stub*/);
4564+
4565+
BLOCK_COMMENT("} lookup_secondary_supers_table");
4566+
4567+
bind(L_fallthrough);
4568+
4569+
if (VerifySecondarySupers) {
4570+
verify_secondary_supers_table(r_sub_klass, r_super_klass,
4571+
result, tmp1, tmp2, tmp3);
4572+
}
4573+
4574+
if (L_success) {
4575+
beqz(result, *L_success);
4576+
}
4577+
}
4578+
43724579
// Called by code generated by check_klass_subtype_slow_path
43734580
// above. This is called when there is a collision in the hashed
43744581
// lookup in the secondary supers array.
@@ -4377,15 +4584,18 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl
43774584
Register r_array_index,
43784585
Register r_bitmap,
43794586
Register result,
4380-
Register tmp1) {
4381-
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, tmp1, result, t0);
4587+
Register tmp,
4588+
bool is_stub) {
4589+
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, tmp, result, t0);
43824590

43834591
const Register
4384-
r_array_length = tmp1,
4592+
r_array_length = tmp,
43854593
r_sub_klass = noreg; // unused
43864594

4387-
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length,
4388-
r_array_index, r_sub_klass, result, r_bitmap);
4595+
if (is_stub) {
4596+
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length,
4597+
r_array_index, r_sub_klass, result, r_bitmap);
4598+
}
43894599

43904600
Label L_matched, L_fallthrough, L_bitmap_full;
43914601

@@ -4412,8 +4622,10 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl
44124622
{ // This is conventional linear probing, but instead of terminating
44134623
// when a null entry is found in the table, we maintain a bitmap
44144624
// in which a 0 indicates missing entries.
4415-
// The check above guarantees there are 0s in the bitmap, so the loop
4416-
// eventually terminates.
4625+
// As long as the bitmap is not completely full,
4626+
// array_length == popcount(bitmap). The array_length check above
4627+
// guarantees there are 0s in the bitmap, so the loop eventually
4628+
// terminates.
44174629
Label L_loop;
44184630
bind(L_loop);
44194631

@@ -4465,9 +4677,6 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
44654677
r_array_index = noreg, // unused
44664678
r_bitmap = noreg; // unused
44674679

4468-
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS(r_super_klass, r_array_base, r_array_length,
4469-
r_array_index, r_sub_klass, result, r_bitmap);
4470-
44714680
BLOCK_COMMENT("verify_secondary_supers_table {");
44724681

44734682
// We will consult the secondary-super array.

0 commit comments

Comments
 (0)
Please sign in to comment.