Skip to content

Commit

Permalink
8270947: AArch64: C1: use zero_words to initialize all objects
Browse files Browse the repository at this point in the history
Backport-of: 6c68ce2d396c6fe02201daf2bdb8c164de807cc1
  • Loading branch information
GoeLin committed Oct 18, 2022
1 parent dbf694e commit 974ff6b
Show file tree
Hide file tree
Showing 8 changed files with 28,831 additions and 165 deletions.
4 changes: 2 additions & 2 deletions src/hotspot/cpu/aarch64/aarch64.ad
Expand Up @@ -15196,12 +15196,12 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
ins_pipe(pipe_class_memory);
%}

instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 temp, Universe dummy, rFlagsReg cr)
%{
predicate((uint64_t)n->in(2)->get_long()
< (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
match(Set dummy (ClearArray cnt base));
effect(USE_KILL base);
effect(TEMP temp, USE_KILL base, KILL cr);

ins_cost(4 * INSN_COST);
format %{ "ClearArray $cnt, $base" %}
Expand Down
12 changes: 6 additions & 6 deletions src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
Expand Up @@ -1115,8 +1115,8 @@ void LIRGenerator::do_NewInstance(NewInstance* x) {
CodeEmitInfo* info = state_for(x, x->state());
LIR_Opr reg = result_register_for(x->type());
new_instance(reg, x->klass(), x->is_unresolved(),
FrameMap::r2_oop_opr,
FrameMap::r5_oop_opr,
FrameMap::r10_oop_opr,
FrameMap::r11_oop_opr,
FrameMap::r4_oop_opr,
LIR_OprFact::illegalOpr,
FrameMap::r3_metadata_opr, info);
Expand All @@ -1131,8 +1131,8 @@ void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
length.load_item_force(FrameMap::r19_opr);

LIR_Opr reg = result_register_for(x->type());
LIR_Opr tmp1 = FrameMap::r2_oop_opr;
LIR_Opr tmp2 = FrameMap::r4_oop_opr;
LIR_Opr tmp1 = FrameMap::r10_oop_opr;
LIR_Opr tmp2 = FrameMap::r11_oop_opr;
LIR_Opr tmp3 = FrameMap::r5_oop_opr;
LIR_Opr tmp4 = reg;
LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
Expand Down Expand Up @@ -1160,8 +1160,8 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
CodeEmitInfo* info = state_for(x, x->state());

LIR_Opr reg = result_register_for(x->type());
LIR_Opr tmp1 = FrameMap::r2_oop_opr;
LIR_Opr tmp2 = FrameMap::r4_oop_opr;
LIR_Opr tmp1 = FrameMap::r10_oop_opr;
LIR_Opr tmp2 = FrameMap::r11_oop_opr;
LIR_Opr tmp3 = FrameMap::r5_oop_opr;
LIR_Opr tmp4 = reg;
LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
Expand Down
64 changes: 18 additions & 46 deletions src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -203,20 +203,24 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
}

// preserves obj, destroys len_in_bytes
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
//
// Scratch registers: t1 = r10, t2 = r11
//
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2) {
assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
assert(t1 == r10 && t2 == r11, "must be");

Label done;

// len_in_bytes is positive and ptr sized
subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
br(Assembler::EQ, done);

// Preserve obj
if (hdr_size_in_bytes)
add(obj, obj, hdr_size_in_bytes);
zero_memory(obj, len_in_bytes, t1);
if (hdr_size_in_bytes)
sub(obj, obj, hdr_size_in_bytes);
// zero_words() takes ptr in r10 and count in words in r11
mov(rscratch1, len_in_bytes);
lea(t1, Address(obj, hdr_size_in_bytes));
lsr(t2, rscratch1, LogBytesPerWord);
zero_words(t1, t2);

bind(done);
}
Expand All @@ -231,6 +235,7 @@ void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2,
initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
}

// Scratch registers: t1 = r10, t2 = r11
void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
"con_size_in_bytes is not multiple of alignment");
Expand All @@ -241,45 +246,13 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
// clear rest of allocated space
const Register index = t2;
const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below)
if (var_size_in_bytes != noreg) {
mov(index, var_size_in_bytes);
initialize_body(obj, index, hdr_size_in_bytes, t1);
} else if (con_size_in_bytes <= threshold) {
// use explicit null stores
int i = hdr_size_in_bytes;
if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
str(zr, Address(obj, i));
i += BytesPerWord;
}
for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
stp(zr, zr, Address(obj, i));
initialize_body(obj, index, hdr_size_in_bytes, t1, t2);
} else if (con_size_in_bytes > hdr_size_in_bytes) {
block_comment("zero memory");
// use loop to null out the fields

int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
mov(index, words / 8);

const int unroll = 8; // Number of str(zr) instructions we'll unroll
int remainder = words % unroll;
lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));

Label entry_point, loop;
b(entry_point);

bind(loop);
sub(index, index, 1);
for (int i = -unroll; i < 0; i++) {
if (-i == remainder)
bind(entry_point);
str(zr, Address(rscratch1, i * wordSize));
}
if (remainder == 0)
bind(entry_point);
add(rscratch1, rscratch1, unroll * wordSize);
cbnz(index, loop);

con_size_in_bytes -= hdr_size_in_bytes;
lea(t1, Address(obj, hdr_size_in_bytes));
zero_words(t1, con_size_in_bytes / BytesPerWord);
}
}

Expand Down Expand Up @@ -314,8 +287,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
initialize_header(obj, klass, len, t1, t2);

// clear rest of allocated space
const Register len_zero = len;
initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2);

membar(StoreStore);

Expand Down
6 changes: 3 additions & 3 deletions src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -48,7 +48,7 @@ using MacroAssembler::null_check;
);

void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1);
void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2);

void float_cmp(bool is_float, int unordered_result,
FloatRegister f0, FloatRegister f1,
Expand Down
16 changes: 8 additions & 8 deletions src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -656,9 +656,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
Label slow_path;
Register obj_size = r2;
Register t1 = r19;
Register t2 = r4;
Register obj_size = r19;
Register t1 = r10;
Register t2 = r11;
assert_different_registers(klass, obj, obj_size, t1, t2);

__ stp(r19, zr, Address(__ pre(sp, -2 * wordSize)));
Expand Down Expand Up @@ -769,9 +769,9 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
// allocations.
// Otherwise, just go to the slow path.
if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
Register arr_size = r4;
Register t1 = r2;
Register t2 = r5;
Register arr_size = r5;
Register t1 = r10;
Register t2 = r11;
Label slow_path;
assert_different_registers(length, klass, obj, arr_size, t1, t2);

Expand Down Expand Up @@ -801,7 +801,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ andr(t1, t1, Klass::_lh_header_size_mask);
__ sub(arr_size, arr_size, t1); // body length
__ add(t1, t1, obj); // body start
__ initialize_body(t1, arr_size, 0, t2);
__ initialize_body(t1, arr_size, 0, t1, t2);
__ membar(Assembler::StoreStore);
__ verify_oop(obj);

Expand Down

1 comment on commit 974ff6b

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.