Skip to content

Commit 2f10a31

Browse files
galderzrwestrel
authored andcommittedMay 15, 2024
8302850: Implement C1 clone intrinsic that reuses arraycopy code for primitive arrays
Reviewed-by: dlong, roland
1 parent c642f44 commit 2f10a31

16 files changed

+288
-34
lines changed
 

‎src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -1212,7 +1212,8 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
12121212
arrayOopDesc::base_offset_in_bytes(op->type()),
12131213
array_element_size(op->type()),
12141214
op->klass()->as_register(),
1215-
*op->stub()->entry());
1215+
*op->stub()->entry(),
1216+
op->zero_array());
12161217
}
12171218
__ bind(*op->stub()->continuation());
12181219
}
@@ -2504,7 +2505,9 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
25042505
__ call_VM_leaf(entry, 3);
25052506
}
25062507

2507-
__ bind(*stub->continuation());
2508+
if (stub != nullptr) {
2509+
__ bind(*stub->continuation());
2510+
}
25082511
}
25092512

25102513

‎src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp

+18-3
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,13 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
878878
assert(x->number_of_arguments() == 5, "wrong type");
879879

880880
// Make all state_for calls early since they can emit code
881-
CodeEmitInfo* info = state_for(x, x->state());
881+
CodeEmitInfo* info = nullptr;
882+
if (x->state_before() != nullptr && x->state_before()->force_reexecute()) {
883+
info = state_for(x, x->state_before());
884+
info->set_force_reexecute();
885+
} else {
886+
info = state_for(x, x->state());
887+
}
882888

883889
LIRItem src(x->argument_at(0), this);
884890
LIRItem src_pos(x->argument_at(1), this);
@@ -911,6 +917,9 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
911917
int flags;
912918
ciArrayKlass* expected_type;
913919
arraycopy_helper(x, &flags, &expected_type);
920+
if (x->check_flag(Instruction::OmitChecksFlag)) {
921+
flags = 0;
922+
}
914923

915924
__ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, expected_type, flags, info); // does add_safepoint
916925
}
@@ -1132,7 +1141,13 @@ void LIRGenerator::do_NewInstance(NewInstance* x) {
11321141
}
11331142

11341143
void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
1135-
CodeEmitInfo* info = state_for(x, x->state());
1144+
CodeEmitInfo* info = nullptr;
1145+
if (x->state_before() != nullptr && x->state_before()->force_reexecute()) {
1146+
info = state_for(x, x->state_before());
1147+
info->set_force_reexecute();
1148+
} else {
1149+
info = state_for(x, x->state());
1150+
}
11361151

11371152
LIRItem length(x->length(), this);
11381153
length.load_item_force(FrameMap::r19_opr);
@@ -1149,7 +1164,7 @@ void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
11491164
__ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
11501165

11511166
CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
1152-
__ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
1167+
__ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path, x->zero_array());
11531168

11541169
LIR_Opr result = rlock_result(x);
11551170
__ move(reg, result);

‎src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
272272

273273
verify_oop(obj);
274274
}
275-
void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int base_offset_in_bytes, int f, Register klass, Label& slow_case) {
275+
void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int base_offset_in_bytes, int f, Register klass, Label& slow_case, bool zero_array) {
276276
assert_different_registers(obj, len, t1, t2, klass);
277277

278278
// determine alignment mask
@@ -297,7 +297,9 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
297297
// following the length field in initialize_header().
298298
int base_offset = align_up(base_offset_in_bytes, BytesPerWord);
299299
// clear rest of allocated space
300-
initialize_body(obj, arr_size, base_offset, t1, t2);
300+
if (zero_array) {
301+
initialize_body(obj, arr_size, base_offset, t1, t2);
302+
}
301303
if (Compilation::current()->bailed_out()) {
302304
return;
303305
}

‎src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ using MacroAssembler::null_check;
100100
// base_offset_in_bytes: offset of first array element, in bytes
101101
// f : element scale factor
102102
// slow_case : exit to slow case implementation if fast allocation fails
103-
void allocate_array(Register obj, Register len, Register t, Register t2, int base_offset_in_bytes, int f, Register klass, Label& slow_case);
103+
// zero_array : zero the allocated array or not
104+
void allocate_array(Register obj, Register len, Register t, Register t2, int base_offset_in_bytes, int f, Register klass, Label& slow_case, bool zero_array);
104105

105106
int rsp_offset() const { return _rsp_offset; }
106107
void set_rsp_offset(int n) { _rsp_offset = n; }

‎src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -1621,7 +1621,8 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
16211621
arrayOopDesc::base_offset_in_bytes(op->type()),
16221622
array_element_size(op->type()),
16231623
op->klass()->as_register(),
1624-
*op->stub()->entry());
1624+
*op->stub()->entry(),
1625+
op->zero_array());
16251626
}
16261627
__ bind(*op->stub()->continuation());
16271628
}
@@ -3453,7 +3454,9 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
34533454
address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
34543455
__ call_VM_leaf(entry, 0);
34553456

3456-
__ bind(*stub->continuation());
3457+
if (stub != nullptr) {
3458+
__ bind(*stub->continuation());
3459+
}
34573460
}
34583461

34593462
void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {

‎src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp

+27-7
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,13 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
10041004
assert(x->number_of_arguments() == 5, "wrong type");
10051005

10061006
// Make all state_for calls early since they can emit code
1007-
CodeEmitInfo* info = state_for(x, x->state());
1007+
CodeEmitInfo* info = nullptr;
1008+
if (x->state_before() != nullptr && x->state_before()->force_reexecute()) {
1009+
info = state_for(x, x->state_before());
1010+
info->set_force_reexecute();
1011+
} else {
1012+
info = state_for(x, x->state());
1013+
}
10081014

10091015
LIRItem src(x->argument_at(0), this);
10101016
LIRItem src_pos(x->argument_at(1), this);
@@ -1016,13 +1022,25 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
10161022
// LinearScan will fail allocation (because arraycopy always needs a
10171023
// call)
10181024

1025+
int flags;
1026+
ciArrayKlass* expected_type;
1027+
arraycopy_helper(x, &flags, &expected_type);
1028+
if (x->check_flag(Instruction::OmitChecksFlag)) {
1029+
flags = 0;
1030+
}
1031+
10191032
#ifndef _LP64
10201033
src.load_item_force (FrameMap::rcx_oop_opr);
10211034
src_pos.load_item_force (FrameMap::rdx_opr);
10221035
dst.load_item_force (FrameMap::rax_oop_opr);
10231036
dst_pos.load_item_force (FrameMap::rbx_opr);
10241037
length.load_item_force (FrameMap::rdi_opr);
10251038
LIR_Opr tmp = (FrameMap::rsi_opr);
1039+
1040+
if (expected_type != nullptr && flags == 0) {
1041+
FrameMap* f = Compilation::current()->frame_map();
1042+
f->update_reserved_argument_area_size(3 * BytesPerWord);
1043+
}
10261044
#else
10271045

10281046
// The java calling convention will give us enough registers
@@ -1044,10 +1062,6 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
10441062

10451063
set_no_result(x);
10461064

1047-
int flags;
1048-
ciArrayKlass* expected_type;
1049-
arraycopy_helper(x, &flags, &expected_type);
1050-
10511065
__ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, expected_type, flags, info); // does add_safepoint
10521066
}
10531067

@@ -1310,7 +1324,13 @@ void LIRGenerator::do_NewInstance(NewInstance* x) {
13101324

13111325

13121326
void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
1313-
CodeEmitInfo* info = state_for(x, x->state());
1327+
CodeEmitInfo* info = nullptr;
1328+
if (x->state_before() != nullptr && x->state_before()->force_reexecute()) {
1329+
info = state_for(x, x->state_before());
1330+
info->set_force_reexecute();
1331+
} else {
1332+
info = state_for(x, x->state());
1333+
}
13141334

13151335
LIRItem length(x->length(), this);
13161336
length.load_item_force(FrameMap::rbx_opr);
@@ -1327,7 +1347,7 @@ void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
13271347
__ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
13281348

13291349
CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
1330-
__ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
1350+
__ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path, x->zero_array());
13311351

13321352
LIR_Opr result = rlock_result(x);
13331353
__ move(reg, result);

‎src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp

+8-6
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
278278
verify_oop(obj);
279279
}
280280

281-
void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int base_offset_in_bytes, Address::ScaleFactor f, Register klass, Label& slow_case) {
281+
void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int base_offset_in_bytes, Address::ScaleFactor f, Register klass, Label& slow_case, bool zero_array) {
282282
assert(obj == rax, "obj must be in rax, for cmpxchg");
283283
assert_different_registers(obj, len, t1, t2, klass);
284284

@@ -300,11 +300,13 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
300300
initialize_header(obj, klass, len, t1, t2);
301301

302302
// clear rest of allocated space
303-
const Register len_zero = len;
304-
// Align-up to word boundary, because we clear the 4 bytes potentially
305-
// following the length field in initialize_header().
306-
int base_offset = align_up(base_offset_in_bytes, BytesPerWord);
307-
initialize_body(obj, arr_size, base_offset, len_zero);
303+
if (zero_array) {
304+
const Register len_zero = len;
305+
// Align-up to word boundary, because we clear the 4 bytes potentially
306+
// following the length field in initialize_header().
307+
int base_offset = align_up(base_offset_in_bytes, BytesPerWord);
308+
initialize_body(obj, arr_size, base_offset, len_zero);
309+
}
308310

309311
if (CURRENT_ENV->dtrace_alloc_probes()) {
310312
assert(obj == rax, "must be");

‎src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@
8989
// base_offset_in_bytes: offset of the first array element, in bytes
9090
// f : element scale factor
9191
// slow_case : exit to slow case implementation if fast allocation fails
92-
void allocate_array(Register obj, Register len, Register t, Register t2, int base_offset_in_bytes, Address::ScaleFactor f, Register klass, Label& slow_case);
92+
// zero_array : zero the allocated array or not
93+
void allocate_array(Register obj, Register len, Register t, Register t2, int base_offset_in_bytes, Address::ScaleFactor f, Register klass, Label& slow_case, bool zero_array);
9394

9495
int rsp_offset() const { return _rsp_offset; }
9596
void set_rsp_offset(int n) { _rsp_offset = n; }

‎src/hotspot/share/c1/c1_Compiler.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,9 @@ bool Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
235235
case vmIntrinsics::_counterTime:
236236
#endif
237237
case vmIntrinsics::_getObjectSize:
238+
#if defined(X86) || defined(AARCH64)
239+
case vmIntrinsics::_clone:
240+
#endif
238241
break;
239242
case vmIntrinsics::_blackhole:
240243
break;

‎src/hotspot/share/c1/c1_GraphBuilder.cpp

+50-3
Original file line numberDiff line numberDiff line change
@@ -2026,8 +2026,11 @@ void GraphBuilder::invoke(Bytecodes::Code code) {
20262026
int index = state()->stack_size() - (target->arg_size_no_receiver() + 1);
20272027
receiver = state()->stack_at(index);
20282028
ciType* type = receiver->exact_type();
2029-
if (type != nullptr && type->is_loaded() &&
2030-
type->is_instance_klass() && !type->as_instance_klass()->is_interface()) {
2029+
if (type != nullptr && type->is_loaded()) {
2030+
assert(!type->is_instance_klass() || !type->as_instance_klass()->is_interface(), "Must not be an interface");
2031+
// Detects non-interface instances, primitive arrays, and some object arrays.
2032+
// Array receivers can only call Object methods, so we should be able to allow
2033+
// all object arrays here too, even those with unloaded types.
20312034
receiver_klass = (ciInstanceKlass*) type;
20322035
type_is_exact = true;
20332036
}
@@ -2243,7 +2246,7 @@ void GraphBuilder::new_instance(int klass_index) {
22432246

22442247
void GraphBuilder::new_type_array() {
22452248
ValueStack* state_before = copy_state_exhandling();
2246-
apush(append_split(new NewTypeArray(ipop(), (BasicType)stream()->get_index(), state_before)));
2249+
apush(append_split(new NewTypeArray(ipop(), (BasicType)stream()->get_index(), state_before, true)));
22472250
}
22482251

22492252

@@ -3650,9 +3653,13 @@ void GraphBuilder::build_graph_for_intrinsic(ciMethod* callee, bool ignore_retur
36503653
case vmIntrinsics::_getAndSetReference : append_unsafe_get_and_set(callee, false); return;
36513654
case vmIntrinsics::_getCharStringU : append_char_access(callee, false); return;
36523655
case vmIntrinsics::_putCharStringU : append_char_access(callee, true); return;
3656+
case vmIntrinsics::_clone : append_alloc_array_copy(callee); return;
36533657
default:
36543658
break;
36553659
}
3660+
if (_inline_bailout_msg != nullptr) {
3661+
return;
3662+
}
36563663

36573664
// create intrinsic node
36583665
const bool has_receiver = !callee->is_static();
@@ -3714,6 +3721,9 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee, bool ignore_return) {
37143721
}
37153722
}
37163723
build_graph_for_intrinsic(callee, ignore_return);
3724+
if (_inline_bailout_msg != nullptr) {
3725+
return false;
3726+
}
37173727
return true;
37183728
}
37193729

@@ -4427,6 +4437,43 @@ void GraphBuilder::append_char_access(ciMethod* callee, bool is_store) {
44274437
}
44284438
}
44294439

4440+
void GraphBuilder::append_alloc_array_copy(ciMethod* callee) {
4441+
const int args_base = state()->stack_size() - callee->arg_size();
4442+
ciType* receiver_type = state()->stack_at(args_base)->exact_type();
4443+
if (receiver_type == nullptr) {
4444+
inline_bailout("must have a receiver");
4445+
return;
4446+
}
4447+
if (!receiver_type->is_type_array_klass()) {
4448+
inline_bailout("clone array not primitive");
4449+
return;
4450+
}
4451+
4452+
ValueStack* state_before = copy_state_before();
4453+
state_before->set_force_reexecute();
4454+
Value src = apop();
4455+
BasicType basic_type = src->exact_type()->as_array_klass()->element_type()->basic_type();
4456+
Value length = append(new ArrayLength(src, state_before));
4457+
Value new_array = append_split(new NewTypeArray(length, basic_type, state_before, false));
4458+
4459+
ValueType* result_type = as_ValueType(callee->return_type());
4460+
vmIntrinsics::ID id = vmIntrinsics::_arraycopy;
4461+
Values* args = new Values(5);
4462+
args->push(src);
4463+
args->push(append(new Constant(new IntConstant(0))));
4464+
args->push(new_array);
4465+
args->push(append(new Constant(new IntConstant(0))));
4466+
args->push(length);
4467+
const bool has_receiver = true;
4468+
Intrinsic* array_copy = new Intrinsic(result_type, id,
4469+
args, has_receiver, state_before,
4470+
vmIntrinsics::preserves_state(id),
4471+
vmIntrinsics::can_trap(id));
4472+
array_copy->set_flag(Instruction::OmitChecksFlag, true);
4473+
append_split(array_copy);
4474+
apush(new_array);
4475+
}
4476+
44304477
void GraphBuilder::print_inlining(ciMethod* callee, const char* msg, bool success) {
44314478
CompileLog* log = compilation()->log();
44324479
if (log != nullptr) {

‎src/hotspot/share/c1/c1_GraphBuilder.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ class GraphBuilder {
379379
void append_unsafe_CAS(ciMethod* callee);
380380
void append_unsafe_get_and_set(ciMethod* callee, bool is_add);
381381
void append_char_access(ciMethod* callee, bool is_store);
382+
void append_alloc_array_copy(ciMethod* callee);
382383

383384
void print_inlining(ciMethod* callee, const char* msg, bool success = true);
384385

‎src/hotspot/share/c1/c1_Instruction.hpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ class Instruction: public CompilationResourceObj {
364364
InWorkListFlag,
365365
DeoptimizeOnException,
366366
KillsMemoryFlag,
367+
OmitChecksFlag,
367368
InstructionLastFlag
368369
};
369370

@@ -1327,16 +1328,19 @@ BASE(NewArray, StateSplit)
13271328
LEAF(NewTypeArray, NewArray)
13281329
private:
13291330
BasicType _elt_type;
1331+
bool _zero_array;
13301332

13311333
public:
13321334
// creation
1333-
NewTypeArray(Value length, BasicType elt_type, ValueStack* state_before)
1335+
NewTypeArray(Value length, BasicType elt_type, ValueStack* state_before, bool zero_array)
13341336
: NewArray(length, state_before)
13351337
, _elt_type(elt_type)
1338+
, _zero_array(zero_array)
13361339
{}
13371340

13381341
// accessors
13391342
BasicType elt_type() const { return _elt_type; }
1343+
bool zero_array() const { return _zero_array; }
13401344
ciType* exact_type() const;
13411345
};
13421346

0 commit comments

Comments
 (0)
Please sign in to comment.