Skip to content

Commit 9307211

Browse files
committedMay 11, 2024
Merge
2 parents 75e381d + 32c7681 commit 9307211

File tree

48 files changed

+1031
-245
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1031
-245
lines changed
 

‎src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

+23-19
Original file line numberDiff line numberDiff line change
@@ -5583,12 +5583,22 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
55835583
Label DONE, SAME;
55845584
Register tmp1 = rscratch1;
55855585
Register tmp2 = rscratch2;
5586-
Register cnt2 = tmp2; // cnt2 only used in array length compare
55875586
int elem_per_word = wordSize/elem_size;
55885587
int log_elem_size = exact_log2(elem_size);
5588+
int klass_offset = arrayOopDesc::klass_offset_in_bytes();
55895589
int length_offset = arrayOopDesc::length_offset_in_bytes();
55905590
int base_offset
55915591
= arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
5592+
// When the length offset is not aligned to 8 bytes,
5593+
// then we align it down. This is valid because the new
5594+
// offset will always be the klass which is the same
5595+
// for type arrays.
5596+
int start_offset = align_down(length_offset, BytesPerWord);
5597+
int extra_length = base_offset - start_offset;
5598+
assert(start_offset == length_offset || start_offset == klass_offset,
5599+
"start offset must be 8-byte-aligned or be the klass offset");
5600+
assert(base_offset != start_offset, "must include the length field");
5601+
extra_length = extra_length / elem_size; // We count in elements, not bytes.
55925602
int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);
55935603

55945604
assert(elem_size == 1 || elem_size == 2, "must be char or byte");
@@ -5622,11 +5632,10 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
56225632
// return false;
56235633
bind(A_IS_NOT_NULL);
56245634
ldrw(cnt1, Address(a1, length_offset));
5625-
ldrw(cnt2, Address(a2, length_offset));
5626-
eorw(tmp5, cnt1, cnt2);
5627-
cbnzw(tmp5, DONE);
5628-
lea(a1, Address(a1, base_offset));
5629-
lea(a2, Address(a2, base_offset));
5635+
// Increase loop counter by diff between base- and actual start-offset.
5636+
addw(cnt1, cnt1, extra_length);
5637+
lea(a1, Address(a1, start_offset));
5638+
lea(a2, Address(a2, start_offset));
56305639
// Check for short strings, i.e. smaller than wordSize.
56315640
subs(cnt1, cnt1, elem_per_word);
56325641
br(Assembler::LT, SHORT);
@@ -5689,18 +5698,18 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
56895698
cbz(a1, DONE);
56905699
ldrw(cnt1, Address(a1, length_offset));
56915700
cbz(a2, DONE);
5692-
ldrw(cnt2, Address(a2, length_offset));
5701+
// Increase loop counter by diff between base- and actual start-offset.
5702+
addw(cnt1, cnt1, extra_length);
5703+
56935704
// on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
56945705
// faster to perform another branch before comparing a1 and a2
56955706
cmp(cnt1, (u1)elem_per_word);
56965707
br(LE, SHORT); // short or same
5697-
ldr(tmp3, Address(pre(a1, base_offset)));
5708+
ldr(tmp3, Address(pre(a1, start_offset)));
56985709
subs(zr, cnt1, stubBytesThreshold);
56995710
br(GE, STUB);
5700-
ldr(tmp4, Address(pre(a2, base_offset)));
5711+
ldr(tmp4, Address(pre(a2, start_offset)));
57015712
sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5702-
cmp(cnt2, cnt1);
5703-
br(NE, DONE);
57045713

57055714
// Main 16 byte comparison loop with 2 exits
57065715
bind(NEXT_DWORD); {
@@ -5732,9 +5741,7 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
57325741
b(LAST_CHECK);
57335742

57345743
bind(STUB);
5735-
ldr(tmp4, Address(pre(a2, base_offset)));
5736-
cmp(cnt2, cnt1);
5737-
br(NE, DONE);
5744+
ldr(tmp4, Address(pre(a2, start_offset)));
57385745
if (elem_size == 2) { // convert to byte counter
57395746
lsl(cnt1, cnt1, 1);
57405747
}
@@ -5755,12 +5762,9 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
57555762
mov(result, a2);
57565763
b(DONE);
57575764
bind(SHORT);
5758-
cmp(cnt2, cnt1);
5759-
br(NE, DONE);
5760-
cbz(cnt1, SAME);
57615765
sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
5762-
ldr(tmp3, Address(a1, base_offset));
5763-
ldr(tmp4, Address(a2, base_offset));
5766+
ldr(tmp3, Address(a1, start_offset));
5767+
ldr(tmp4, Address(a2, start_offset));
57645768
bind(LAST_CHECK);
57655769
eor(tmp4, tmp3, tmp4);
57665770
lslv(tmp5, tmp4, tmp5);

‎src/hotspot/cpu/riscv/assembler_riscv.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -1891,6 +1891,9 @@ enum Nf {
18911891
INSN(vbrev8_v, 0b1010111, 0b010, 0b01000, 0b010010); // reverse bits in every byte of element
18921892
INSN(vrev8_v, 0b1010111, 0b010, 0b01001, 0b010010); // reverse bytes in every elememt
18931893

1894+
INSN(vclz_v, 0b1010111, 0b010, 0b01100, 0b010010); // count leading zeros
1895+
INSN(vctz_v, 0b1010111, 0b010, 0b01101, 0b010010); // count trailing zeros
1896+
18941897
#undef INSN
18951898

18961899
#define INSN(NAME, op, funct3, vm, funct6) \

0 commit comments

Comments
 (0)
Please sign in to comment.