Skip to content

Commit 1b6281d

Browse files
author
Hamlin Li
committedFeb 21, 2025
8321003: RISC-V: C2 MulReductionVI
8321004: RISC-V: C2 MulReductionVL Reviewed-by: fyang, rehn
1 parent c73fead commit 1b6281d

File tree

6 files changed

+124
-1
lines changed

6 files changed

+124
-1
lines changed
 

‎src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

+39
Original file line numberDiff line numberDiff line change
@@ -2954,6 +2954,45 @@ void C2_MacroAssembler::reduce_integral_v(Register dst, Register src1,
29542954
vmv_x_s(dst, tmp);
29552955
}
29562956

2957+
void C2_MacroAssembler::reduce_mul_integral_v(Register dst, Register src1, VectorRegister src2,
2958+
VectorRegister vtmp1, VectorRegister vtmp2,
2959+
BasicType bt, uint vector_length, VectorMask vm) {
2960+
assert(bt == T_BYTE || bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported element type");
2961+
vsetvli_helper(bt, vector_length);
2962+
2963+
vector_length /= 2;
2964+
if (vm != Assembler::unmasked) {
2965+
// This behaviour is consistent with spec requirements of vector API, for `reduceLanes`:
2966+
// If no elements are selected, an operation-specific identity value is returned.
2967+
// If the operation is MUL, then the identity value is one.
2968+
vmv_v_i(vtmp1, 1);
2969+
vmerge_vvm(vtmp2, vtmp1, src2); // vm == v0
2970+
vslidedown_vi(vtmp1, vtmp2, vector_length);
2971+
2972+
vsetvli_helper(bt, vector_length);
2973+
vmul_vv(vtmp1, vtmp1, vtmp2);
2974+
} else {
2975+
vslidedown_vi(vtmp1, src2, vector_length);
2976+
2977+
vsetvli_helper(bt, vector_length);
2978+
vmul_vv(vtmp1, vtmp1, src2);
2979+
}
2980+
2981+
while (vector_length > 1) {
2982+
vector_length /= 2;
2983+
vslidedown_vi(vtmp2, vtmp1, vector_length);
2984+
vsetvli_helper(bt, vector_length);
2985+
vmul_vv(vtmp1, vtmp1, vtmp2);
2986+
}
2987+
2988+
vmv_x_s(dst, vtmp1);
2989+
if (bt == T_INT) {
2990+
mulw(dst, dst, src1);
2991+
} else {
2992+
mul(dst, dst, src1);
2993+
}
2994+
}
2995+
29572996
// Set vl and vtype for full and partial vector operations.
29582997
// (vma = mu, vta = tu, vill = false)
29592998
void C2_MacroAssembler::vsetvli_helper(BasicType bt, uint vector_length, LMUL vlmul, Register tmp) {

‎src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,10 @@
239239
int opc, BasicType bt, uint vector_length,
240240
VectorMask vm = Assembler::unmasked);
241241

242+
void reduce_mul_integral_v(Register dst, Register src1, VectorRegister src2,
243+
VectorRegister vtmp1, VectorRegister vtmp2, BasicType bt,
244+
uint vector_length, VectorMask vm = Assembler::unmasked);
245+
242246
void vsetvli_helper(BasicType bt, uint vector_length, LMUL vlmul = Assembler::m1, Register tmp = t0);
243247

244248
void compare_integral_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, int cond,

‎src/hotspot/cpu/riscv/riscv_v.ad

+68
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
33
// Copyright (c) 2020, 2023, Arm Limited. All rights reserved.
44
// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
5+
// Copyright (c) 2023, 2025, Rivos Inc. All rights reserved.
56
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
67
//
78
// This code is free software; you can redistribute it and/or modify it
@@ -99,6 +100,12 @@ source %{
99100
return false;
100101
}
101102
break;
103+
case Op_MulReductionVI:
104+
case Op_MulReductionVL:
105+
// When vlen < 4, our log2(vlen) implementation does not help to gain performance improvement.
106+
if (vlen < 4) {
107+
return false;
108+
}
102109
default:
103110
break;
104111
}
@@ -2427,6 +2434,67 @@ instruct vreduce_minD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, v
24272434
ins_pipe(pipe_slow);
24282435
%}
24292436

2437+
2438+
// ------------------------------ Vector reduction mul -------------------------
2439+
2440+
instruct reduce_mulI(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
2441+
vReg tmp1, vReg tmp2) %{
2442+
match(Set dst (MulReductionVI isrc vsrc));
2443+
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
2444+
format %{ "reduce_mulI $dst, $isrc, $vsrc\t" %}
2445+
2446+
ins_encode %{
2447+
__ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
2448+
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
2449+
Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc));
2450+
%}
2451+
ins_pipe(pipe_slow);
2452+
%}
2453+
2454+
instruct reduce_mulI_masked(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
2455+
vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
2456+
match(Set dst (MulReductionVI (Binary isrc vsrc) v0));
2457+
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
2458+
format %{ "reduce_mulI_masked $dst, $isrc, $vsrc, $v0\t" %}
2459+
2460+
ins_encode %{
2461+
__ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
2462+
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
2463+
Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc),
2464+
Assembler::v0_t);
2465+
%}
2466+
ins_pipe(pipe_slow);
2467+
%}
2468+
2469+
instruct reduce_mulL(iRegLNoSp dst, iRegL isrc, vReg vsrc,
2470+
vReg tmp1, vReg tmp2) %{
2471+
match(Set dst (MulReductionVL isrc vsrc));
2472+
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
2473+
format %{ "reduce_mulL $dst, $isrc, $vsrc\t" %}
2474+
2475+
ins_encode %{
2476+
__ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
2477+
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
2478+
Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc));
2479+
%}
2480+
ins_pipe(pipe_slow);
2481+
%}
2482+
2483+
instruct reduce_mulL_masked(iRegLNoSp dst, iRegL isrc, vReg vsrc,
2484+
vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
2485+
match(Set dst (MulReductionVL (Binary isrc vsrc) v0));
2486+
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
2487+
format %{ "reduce_mulL_masked $dst, $isrc, $vsrc, $v0\t" %}
2488+
2489+
ins_encode %{
2490+
__ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
2491+
as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
2492+
Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc),
2493+
Assembler::v0_t);
2494+
%}
2495+
ins_pipe(pipe_slow);
2496+
%}
2497+
24302498
// vector replicate
24312499

24322500
instruct replicate(vReg dst, iRegIorL2I src) %{

‎test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -85,6 +85,10 @@ public static void prodReductionInit(int[] a, int[] b) {
8585
@IR(applyIfCPUFeature = {"sse4.1", "true"},
8686
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
8787
counts = {IRNode.MUL_REDUCTION_VI, ">= 1", IRNode.MUL_REDUCTION_VI, "<= 2"}) // one for main-loop, one for vector-post-loop
88+
@IR(applyIfPlatform = {"riscv64", "true"},
89+
applyIfCPUFeature = {"rvv", "true"},
90+
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
91+
counts = {IRNode.MUL_REDUCTION_VI, ">= 1", IRNode.MUL_REDUCTION_VI, "<= 2"}) // one for main-loop, one for vector-post-loop
8892
public static int prodReductionImplement(int[] a, int[] b, int total) {
8993
for (int i = 0; i < a.length; i++) {
9094
total *= a[i] + b[i];

‎test/hotspot/jtreg/compiler/loopopts/superword/RedTest_int.java

+4
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,10 @@ public static int xorReductionImplement(
219219
@IR(applyIfCPUFeature = {"sse4.1", "true"},
220220
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
221221
counts = {IRNode.MUL_REDUCTION_VI, ">= 1", IRNode.MUL_REDUCTION_VI, "<= 2"}) // one for main-loop, one for vector-post-loop
222+
@IR(applyIfPlatform = {"riscv64", "true"},
223+
applyIfCPUFeature = {"rvv", "true"},
224+
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
225+
counts = {IRNode.MUL_REDUCTION_VI, ">= 1", IRNode.MUL_REDUCTION_VI, "<= 2"}) // one for main-loop, one for vector-post-loop
222226
public static int mulReductionImplement(
223227
int[] a,
224228
int[] b,

‎test/hotspot/jtreg/compiler/loopopts/superword/RedTest_long.java

+4
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,10 @@ public static long xorReductionImplement(
226226
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
227227
applyIfPlatform = {"64-bit", "true"},
228228
counts = {IRNode.MUL_REDUCTION_VL, ">= 1", IRNode.MUL_REDUCTION_VL, "<= 2"}) // one for main-loop, one for vector-post-loop
229+
@IR(applyIfPlatform = {"riscv64", "true"},
230+
applyIfCPUFeature = {"rvv", "true"},
231+
applyIfAnd = {"SuperWordReductions", "true", "LoopMaxUnroll", ">= 8"},
232+
counts = {IRNode.MUL_REDUCTION_VL, ">= 1", IRNode.MUL_REDUCTION_VL, "<= 2"}) // one for main-loop, one for vector-post-loop
229233
public static long mulReductionImplement(
230234
long[] a,
231235
long[] b,

0 commit comments

Comments
 (0)
Please sign in to comment.