Skip to content

Commit de1c12e

Browse files
Bhavana KilambiXiaohong GongJatin Bhateja
committedMar 27, 2023
8301012: [vectorapi]: Intrinsify CompressBitsV/ExpandBitsV and add the AArch64 SVE backend implementation
Co-authored-by: Xiaohong Gong <xgong@openjdk.org> Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org> Reviewed-by: ngasson, eliu, thartmann
1 parent 10fa7d1 commit de1c12e

File tree

9 files changed

+257
-5
lines changed

9 files changed

+257
-5
lines changed
 

‎src/hotspot/cpu/aarch64/aarch64_vector.ad

+36
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,12 @@ source %{
220220
return false;
221221
}
222222
break;
223+
case Op_CompressBitsV:
224+
case Op_ExpandBitsV:
225+
if (UseSVE < 2 || !VM_Version::supports_svebitperm()) {
226+
return false;
227+
}
228+
break;
223229
default:
224230
break;
225231
}
@@ -240,6 +246,8 @@ source %{
240246
case Op_MulReductionVF:
241247
case Op_MulReductionVI:
242248
case Op_MulReductionVL:
249+
case Op_CompressBitsV:
250+
case Op_ExpandBitsV:
243251
return false;
244252
// We use Op_LoadVectorMasked to implement the predicated Op_LoadVector.
245253
// Hence we turn to check whether Op_LoadVectorMasked is supported. The
@@ -6619,3 +6627,31 @@ instruct vsignum_gt128b(vReg dst, vReg src, vReg zero, vReg one, vReg tmp, pRegG
66196627
%}
66206628
ins_pipe(pipe_slow);
66216629
%}
6630+
6631+
// ---------------------------------- CompressBitsV --------------------------------
6632+
6633+
instruct vcompressBits(vReg dst, vReg src1, vReg src2) %{
6634+
match(Set dst (CompressBitsV src1 src2));
6635+
format %{ "vcompressBits $dst, $src1, $src2\t# vector (sve)" %}
6636+
ins_encode %{
6637+
BasicType bt = Matcher::vector_element_basic_type(this);
6638+
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
6639+
__ sve_bext($dst$$FloatRegister, size,
6640+
$src1$$FloatRegister, $src2$$FloatRegister);
6641+
%}
6642+
ins_pipe(pipe_slow);
6643+
%}
6644+
6645+
// ----------------------------------- ExpandBitsV ---------------------------------
6646+
6647+
instruct vexpandBits(vReg dst, vReg src1, vReg src2) %{
6648+
match(Set dst (ExpandBitsV src1 src2));
6649+
format %{ "vexpandBits $dst, $src1, $src2\t# vector (sve)" %}
6650+
ins_encode %{
6651+
BasicType bt = Matcher::vector_element_basic_type(this);
6652+
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
6653+
__ sve_bdep($dst$$FloatRegister, size,
6654+
$src1$$FloatRegister, $src2$$FloatRegister);
6655+
%}
6656+
ins_pipe(pipe_slow);
6657+
%}

‎src/hotspot/cpu/aarch64/aarch64_vector_ad.m4

+30
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,12 @@ source %{
210210
return false;
211211
}
212212
break;
213+
case Op_CompressBitsV:
214+
case Op_ExpandBitsV:
215+
if (UseSVE < 2 || !VM_Version::supports_svebitperm()) {
216+
return false;
217+
}
218+
break;
213219
default:
214220
break;
215221
}
@@ -230,6 +236,8 @@ source %{
230236
case Op_MulReductionVF:
231237
case Op_MulReductionVI:
232238
case Op_MulReductionVL:
239+
case Op_CompressBitsV:
240+
case Op_ExpandBitsV:
233241
return false;
234242
// We use Op_LoadVectorMasked to implement the predicated Op_LoadVector.
235243
// Hence we turn to check whether Op_LoadVectorMasked is supported. The
@@ -4950,3 +4958,25 @@ instruct vsignum_gt128b(vReg dst, vReg src, vReg zero, vReg one, vReg tmp, pRegG
49504958
%}
49514959
ins_pipe(pipe_slow);
49524960
%}
4961+
4962+
dnl
4963+
dnl BITPERM($1, $2, $3 )
4964+
dnl BITPERM(insn_name, op_name, insn)
4965+
define(`BITPERM', `
4966+
instruct $1(vReg dst, vReg src1, vReg src2) %{
4967+
match(Set dst ($2 src1 src2));
4968+
format %{ "$1 $dst, $src1, $src2\t# vector (sve)" %}
4969+
ins_encode %{
4970+
BasicType bt = Matcher::vector_element_basic_type(this);
4971+
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
4972+
__ $3($dst$$FloatRegister, size,
4973+
$src1$$FloatRegister, $src2$$FloatRegister);
4974+
%}
4975+
ins_pipe(pipe_slow);
4976+
%}')dnl
4977+
dnl
4978+
// ---------------------------------- CompressBitsV --------------------------------
4979+
BITPERM(vcompressBits, CompressBitsV, sve_bext)
4980+
4981+
// ----------------------------------- ExpandBitsV ---------------------------------
4982+
BITPERM(vexpandBits, ExpandBitsV, sve_bdep)

‎src/hotspot/share/adlc/formssel.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -4210,7 +4210,7 @@ bool MatchRule::is_vector() const {
42104210
"SqrtVD","SqrtVF",
42114211
"AndV" ,"XorV" ,"OrV",
42124212
"MaxV", "MinV",
4213-
"CompressV", "ExpandV", "CompressM",
4213+
"CompressV", "ExpandV", "CompressM", "CompressBitsV", "ExpandBitsV",
42144214
"AddReductionVI", "AddReductionVL",
42154215
"AddReductionVF", "AddReductionVD",
42164216
"MulReductionVI", "MulReductionVL",

‎src/hotspot/share/opto/classes.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ macro(CheckCastPP)
7777
macro(ClearArray)
7878
macro(CompressBits)
7979
macro(ExpandBits)
80+
macro(CompressBitsV)
81+
macro(ExpandBitsV)
8082
macro(ConstraintCast)
8183
macro(CMoveD)
8284
macro(CMoveVD)

‎src/hotspot/share/opto/vectornode.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -182,11 +182,9 @@ int VectorNode::opcode(int sopc, BasicType bt) {
182182
case Op_ReverseBytesL:
183183
return (bt == T_LONG ? Op_ReverseBytesV : 0);
184184
case Op_CompressBits:
185-
// Not implemented. Returning 0 temporarily
186-
return 0;
185+
return (bt == T_INT || bt == T_LONG ? Op_CompressBitsV : 0);
187186
case Op_ExpandBits:
188-
// Not implemented. Returning 0 temporarily
189-
return 0;
187+
return (bt == T_INT || bt == T_LONG ? Op_ExpandBitsV : 0);
190188
case Op_LShiftI:
191189
switch (bt) {
192190
case T_BOOLEAN:
@@ -703,6 +701,8 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
703701
case Op_ExpandV: return new ExpandVNode(n1, n2, vt);
704702
case Op_CompressV: return new CompressVNode(n1, n2, vt);
705703
case Op_CompressM: assert(n1 == nullptr, ""); return new CompressMNode(n2, vt);
704+
case Op_CompressBitsV: return new CompressBitsVNode(n1, n2, vt);
705+
case Op_ExpandBitsV: return new ExpandBitsVNode(n1, n2, vt);
706706
case Op_CountLeadingZerosV: return new CountLeadingZerosVNode(n1, vt);
707707
case Op_CountTrailingZerosV: return new CountTrailingZerosVNode(n1, vt);
708708
default:

‎src/hotspot/share/opto/vectornode.hpp

+14
Original file line numberDiff line numberDiff line change
@@ -1804,4 +1804,18 @@ class SignumVDNode : public VectorNode {
18041804
virtual int Opcode() const;
18051805
};
18061806

1807+
class CompressBitsVNode : public VectorNode {
1808+
public:
1809+
CompressBitsVNode(Node* in, Node* mask, const TypeVect* vt)
1810+
: VectorNode(in, mask, vt) {}
1811+
virtual int Opcode() const;
1812+
};
1813+
1814+
class ExpandBitsVNode : public VectorNode {
1815+
public:
1816+
ExpandBitsVNode(Node* in, Node* mask, const TypeVect* vt)
1817+
: VectorNode(in, mask, vt) {}
1818+
virtual int Opcode() const;
1819+
};
1820+
18071821
#endif // SHARE_OPTO_VECTORNODE_HPP

‎src/hotspot/share/runtime/vmStructs.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -1764,6 +1764,8 @@
17641764
declare_c2_type(CompressVNode, VectorNode) \
17651765
declare_c2_type(CompressMNode, VectorNode) \
17661766
declare_c2_type(ExpandVNode, VectorNode) \
1767+
declare_c2_type(CompressBitsVNode, VectorNode) \
1768+
declare_c2_type(ExpandBitsVNode, VectorNode) \
17671769
declare_c2_type(MulReductionVDNode, ReductionNode) \
17681770
declare_c2_type(DivVFNode, VectorNode) \
17691771
declare_c2_type(DivVDNode, VectorNode) \

‎test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java

+10
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,16 @@ public class IRNode {
14081408
machOnlyNameRegex(XOR3_SVE, "veor3_sve");
14091409
}
14101410

1411+
public static final String COMPRESS_BITSV = PREFIX + "COMPRESS_BITSV" + POSTFIX;
1412+
static {
1413+
beforeMatchingNameRegex(COMPRESS_BITSV, "CompressBitsV");
1414+
}
1415+
1416+
public static final String EXPAND_BITSV = PREFIX + "EXPAND_BITSV" + POSTFIX;
1417+
static {
1418+
beforeMatchingNameRegex(EXPAND_BITSV, "ExpandBitsV");
1419+
}
1420+
14111421
/*
14121422
* Utility methods to set up IR_NODE_MAPPINGS.
14131423
*/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
/*
2+
* Copyright (c) 2023, Arm Limited. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package compiler.vectorapi;
25+
26+
import compiler.lib.ir_framework.*;
27+
28+
import java.util.Random;
29+
30+
import jdk.incubator.vector.IntVector;
31+
import jdk.incubator.vector.LongVector;
32+
import jdk.incubator.vector.VectorOperators;
33+
import jdk.incubator.vector.VectorSpecies;
34+
35+
import jdk.test.lib.Asserts;
36+
import jdk.test.lib.Utils;
37+
38+
/**
39+
* @test
40+
* @bug 8301012
41+
* @library /test/lib /
42+
* @requires os.arch == "aarch64" & vm.cpu.features ~= ".*sve2.*" & vm.cpu.features ~= ".*svebitperm.*"
43+
* @summary [vectorapi]: Intrinsify CompressBitsV/ExpandBitsV and add the AArch64 SVE backend implementation
44+
* @modules jdk.incubator.vector
45+
* @run driver compiler.vectorapi.TestVectorCompressExpandBits
46+
*/
47+
48+
public class TestVectorCompressExpandBits {
49+
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_PREFERRED;
50+
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_PREFERRED;
51+
52+
private static int LENGTH = 1024;
53+
private static final Random RD = Utils.getRandomInstance();
54+
55+
private static int[] ia;
56+
private static int[] ib;
57+
private static int[] ir;
58+
private static long[] la;
59+
private static long[] lb;
60+
private static long[] lr;
61+
62+
static {
63+
ia = new int[LENGTH];
64+
ib = new int[LENGTH];
65+
ir = new int[LENGTH];
66+
la = new long[LENGTH];
67+
lb = new long[LENGTH];
68+
lr = new long[LENGTH];
69+
70+
for (int i = 0; i < LENGTH; i++) {
71+
ia[i] = RD.nextInt(25);
72+
ib[i] = RD.nextInt(25);
73+
la[i] = RD.nextLong(25);
74+
lb[i] = RD.nextLong(25);
75+
}
76+
}
77+
78+
// Test for vectorized Integer.compress operation in SVE2
79+
@Test
80+
@IR(counts = {IRNode.COMPRESS_BITSV, "> 0"})
81+
public static void testIntCompress() {
82+
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
83+
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
84+
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
85+
av.lanewise(VectorOperators.COMPRESS_BITS, bv).intoArray(ir, i);
86+
}
87+
}
88+
89+
@Run(test = "testIntCompress")
90+
public static void testIntCompress_runner() {
91+
testIntCompress();
92+
for (int i = 0; i < LENGTH; i++) {
93+
Asserts.assertEquals(Integer.compress(ia[i], ib[i]), ir[i]);
94+
}
95+
}
96+
97+
// Test for vectorized Integer.expand operation in SVE2
98+
@Test
99+
@IR(counts = {IRNode.EXPAND_BITSV, "> 0"})
100+
public static void testIntExpand() {
101+
for (int i = 0; i < LENGTH; i += I_SPECIES.length()) {
102+
IntVector av = IntVector.fromArray(I_SPECIES, ia, i);
103+
IntVector bv = IntVector.fromArray(I_SPECIES, ib, i);
104+
av.lanewise(VectorOperators.EXPAND_BITS, bv).intoArray(ir, i);
105+
}
106+
}
107+
108+
@Run(test = "testIntExpand")
109+
public static void testIntExpand_runner() {
110+
testIntExpand();
111+
for (int i = 0; i < LENGTH; i++) {
112+
Asserts.assertEquals(Integer.expand(ia[i], ib[i]), ir[i]);
113+
}
114+
}
115+
116+
// Test for vectorized Long.compress operation in SVE2
117+
@Test
118+
@IR(counts = {IRNode.COMPRESS_BITSV, "> 0"})
119+
public static void testLongCompress() {
120+
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
121+
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
122+
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
123+
av.lanewise(VectorOperators.COMPRESS_BITS, bv).intoArray(lr, i);
124+
}
125+
}
126+
127+
@Run(test = "testLongCompress")
128+
public static void testLongCompress_runner() {
129+
testLongCompress();
130+
for (int i = 0; i < LENGTH; i++) {
131+
Asserts.assertEquals(Long.compress(la[i], lb[i]), lr[i]);
132+
}
133+
}
134+
135+
// Test for vectorized Long.expand operation in SVE2
136+
@Test
137+
@IR(counts = {IRNode.EXPAND_BITSV, "> 0"})
138+
public static void testLongExpand() {
139+
for (int i = 0; i < LENGTH; i += L_SPECIES.length()) {
140+
LongVector av = LongVector.fromArray(L_SPECIES, la, i);
141+
LongVector bv = LongVector.fromArray(L_SPECIES, lb, i);
142+
av.lanewise(VectorOperators.EXPAND_BITS, bv).intoArray(lr, i);
143+
}
144+
}
145+
146+
@Run(test = "testLongExpand")
147+
public static void testLongExpand_runner() {
148+
testLongExpand();
149+
for (int i = 0; i < LENGTH; i++) {
150+
Asserts.assertEquals(Long.expand(la[i], lb[i]), lr[i]);
151+
}
152+
}
153+
154+
public static void main(String[] args) {
155+
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector",
156+
"-XX:UseSVE=2");
157+
}
158+
}

0 commit comments

Comments
 (0)
Please sign in to comment.