Skip to content

Commit 4dfed9b

Browse files
author
duke
committedAug 1, 2024
Automatic merge of jdk:master into master
2 parents 06ff41d + dc35f3e commit 4dfed9b

17 files changed

+416
-127
lines changed
 

‎src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

+119
Original file line numberDiff line numberDiff line change
@@ -1786,6 +1786,16 @@ void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegis
17861786
}
17871787
}
17881788

1789+
void C2_MacroAssembler::unordered_reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) {
1790+
switch (opcode) {
1791+
case Op_AddReductionVF: addps(dst, src); break;
1792+
case Op_AddReductionVD: addpd(dst, src); break;
1793+
case Op_MulReductionVF: mulps(dst, src); break;
1794+
case Op_MulReductionVD: mulpd(dst, src); break;
1795+
default: assert(false, "%s", NodeClassNames[opcode]);
1796+
}
1797+
}
1798+
17891799
void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
17901800
int vector_len = Assembler::AVX_256bit;
17911801

@@ -1834,6 +1844,18 @@ void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegis
18341844
}
18351845
}
18361846

1847+
void C2_MacroAssembler::unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1848+
int vector_len = Assembler::AVX_256bit;
1849+
1850+
switch (opcode) {
1851+
case Op_AddReductionVF: vaddps(dst, src1, src2, vector_len); break;
1852+
case Op_AddReductionVD: vaddpd(dst, src1, src2, vector_len); break;
1853+
case Op_MulReductionVF: vmulps(dst, src1, src2, vector_len); break;
1854+
case Op_MulReductionVD: vmulpd(dst, src1, src2, vector_len); break;
1855+
default: assert(false, "%s", NodeClassNames[opcode]);
1856+
}
1857+
}
1858+
18371859
void C2_MacroAssembler::reduce_fp(int opcode, int vlen,
18381860
XMMRegister dst, XMMRegister src,
18391861
XMMRegister vtmp1, XMMRegister vtmp2) {
@@ -1852,6 +1874,24 @@ void C2_MacroAssembler::reduce_fp(int opcode, int vlen,
18521874
}
18531875
}
18541876

1877+
void C2_MacroAssembler::unordered_reduce_fp(int opcode, int vlen,
1878+
XMMRegister dst, XMMRegister src,
1879+
XMMRegister vtmp1, XMMRegister vtmp2) {
1880+
switch (opcode) {
1881+
case Op_AddReductionVF:
1882+
case Op_MulReductionVF:
1883+
unorderedReduceF(opcode, vlen, dst, src, vtmp1, vtmp2);
1884+
break;
1885+
1886+
case Op_AddReductionVD:
1887+
case Op_MulReductionVD:
1888+
unorderedReduceD(opcode, vlen, dst, src, vtmp1, vtmp2);
1889+
break;
1890+
1891+
default: assert(false, "%s", NodeClassNames[opcode]);
1892+
}
1893+
}
1894+
18551895
void C2_MacroAssembler::reduceB(int opcode, int vlen,
18561896
Register dst, Register src1, XMMRegister src2,
18571897
XMMRegister vtmp1, XMMRegister vtmp2) {
@@ -1954,6 +1994,45 @@ void C2_MacroAssembler::reduceD(int opcode, int vlen, XMMRegister dst, XMMRegist
19541994
}
19551995
}
19561996

1997+
void C2_MacroAssembler::unorderedReduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1998+
switch (vlen) {
1999+
case 2:
2000+
assert(vtmp1 == xnoreg, "");
2001+
assert(vtmp2 == xnoreg, "");
2002+
unorderedReduce2F(opcode, dst, src);
2003+
break;
2004+
case 4:
2005+
assert(vtmp2 == xnoreg, "");
2006+
unorderedReduce4F(opcode, dst, src, vtmp1);
2007+
break;
2008+
case 8:
2009+
unorderedReduce8F(opcode, dst, src, vtmp1, vtmp2);
2010+
break;
2011+
case 16:
2012+
unorderedReduce16F(opcode, dst, src, vtmp1, vtmp2);
2013+
break;
2014+
default: assert(false, "wrong vector length");
2015+
}
2016+
}
2017+
2018+
void C2_MacroAssembler::unorderedReduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
2019+
switch (vlen) {
2020+
case 2:
2021+
assert(vtmp1 == xnoreg, "");
2022+
assert(vtmp2 == xnoreg, "");
2023+
unorderedReduce2D(opcode, dst, src);
2024+
break;
2025+
case 4:
2026+
assert(vtmp2 == xnoreg, "");
2027+
unorderedReduce4D(opcode, dst, src, vtmp1);
2028+
break;
2029+
case 8:
2030+
unorderedReduce8D(opcode, dst, src, vtmp1, vtmp2);
2031+
break;
2032+
default: assert(false, "wrong vector length");
2033+
}
2034+
}
2035+
19572036
void C2_MacroAssembler::reduce2I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
19582037
if (opcode == Op_AddReductionVI) {
19592038
if (vtmp1 != src2) {
@@ -2181,6 +2260,29 @@ void C2_MacroAssembler::reduce16F(int opcode, XMMRegister dst, XMMRegister src,
21812260
reduce8F(opcode, dst, vtmp1, vtmp1, vtmp2);
21822261
}
21832262

2263+
void C2_MacroAssembler::unorderedReduce2F(int opcode, XMMRegister dst, XMMRegister src) {
2264+
pshufd(dst, src, 0x1);
2265+
reduce_operation_128(T_FLOAT, opcode, dst, src);
2266+
}
2267+
2268+
void C2_MacroAssembler::unorderedReduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
2269+
pshufd(vtmp, src, 0xE);
2270+
unordered_reduce_operation_128(T_FLOAT, opcode, vtmp, src);
2271+
unorderedReduce2F(opcode, dst, vtmp);
2272+
}
2273+
2274+
void C2_MacroAssembler::unorderedReduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
2275+
vextractf128_high(vtmp1, src);
2276+
unordered_reduce_operation_128(T_FLOAT, opcode, vtmp1, src);
2277+
unorderedReduce4F(opcode, dst, vtmp1, vtmp2);
2278+
}
2279+
2280+
void C2_MacroAssembler::unorderedReduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
2281+
vextractf64x4_high(vtmp2, src);
2282+
unordered_reduce_operation_256(T_FLOAT, opcode, vtmp2, vtmp2, src);
2283+
unorderedReduce8F(opcode, dst, vtmp2, vtmp1, vtmp2);
2284+
}
2285+
21842286
void C2_MacroAssembler::reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
21852287
reduce_operation_128(T_DOUBLE, opcode, dst, src);
21862288
pshufd(vtmp, src, 0xE);
@@ -2199,6 +2301,23 @@ void C2_MacroAssembler::reduce8D(int opcode, XMMRegister dst, XMMRegister src, X
21992301
reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2);
22002302
}
22012303

2304+
void C2_MacroAssembler::unorderedReduce2D(int opcode, XMMRegister dst, XMMRegister src) {
2305+
pshufd(dst, src, 0xE);
2306+
reduce_operation_128(T_DOUBLE, opcode, dst, src);
2307+
}
2308+
2309+
void C2_MacroAssembler::unorderedReduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
2310+
vextractf128_high(vtmp, src);
2311+
unordered_reduce_operation_128(T_DOUBLE, opcode, vtmp, src);
2312+
unorderedReduce2D(opcode, dst, vtmp);
2313+
}
2314+
2315+
void C2_MacroAssembler::unorderedReduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
2316+
vextractf64x4_high(vtmp2, src);
2317+
unordered_reduce_operation_256(T_DOUBLE, opcode, vtmp2, vtmp2, src);
2318+
unorderedReduce4D(opcode, dst, vtmp2, vtmp1);
2319+
}
2320+
22022321
void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len) {
22032322
MacroAssembler::evmovdqu(type, kmask, dst, src, merge, vector_len);
22042323
}

‎src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp

+18
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@
149149
void reduce_fp(int opcode, int vlen,
150150
XMMRegister dst, XMMRegister src,
151151
XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
152+
void unordered_reduce_fp(int opcode, int vlen,
153+
XMMRegister dst, XMMRegister src,
154+
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
152155
void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
153156
void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
154157
void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
@@ -161,6 +164,8 @@
161164
private:
162165
void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
163166
void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
167+
void unorderedReduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
168+
void unorderedReduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
164169

165170
// Int Reduction
166171
void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
@@ -197,14 +202,27 @@
197202
void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
198203
void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
199204

205+
// Unordered Float Reduction
206+
void unorderedReduce2F(int opcode, XMMRegister dst, XMMRegister src);
207+
void unorderedReduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
208+
void unorderedReduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
209+
void unorderedReduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
210+
200211
// Double Reduction
201212
void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
202213
void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
203214
void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
204215

216+
// Unordered Double Reduction
217+
void unorderedReduce2D(int opcode, XMMRegister dst, XMMRegister src);
218+
void unorderedReduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
219+
void unorderedReduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
220+
205221
// Base reduction instruction
206222
void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
207223
void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
224+
void unordered_reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
225+
void unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
208226

209227
public:
210228
#ifdef _LP64

‎src/hotspot/cpu/x86/x86.ad

+126-6
Original file line numberDiff line numberDiff line change
@@ -5109,7 +5109,7 @@ instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtm
51095109
// =======================Float Reduction==========================================
51105110

51115111
instruct reductionF128(regF dst, vec src, vec vtmp) %{
5112-
predicate(Matcher::vector_length(n->in(2)) <= 4); // src
5112+
predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
51135113
match(Set dst (AddReductionVF dst src));
51145114
match(Set dst (MulReductionVF dst src));
51155115
effect(TEMP dst, TEMP vtmp);
@@ -5123,7 +5123,7 @@ instruct reductionF128(regF dst, vec src, vec vtmp) %{
51235123
%}
51245124

51255125
instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
5126-
predicate(Matcher::vector_length(n->in(2)) == 8); // src
5126+
predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
51275127
match(Set dst (AddReductionVF dst src));
51285128
match(Set dst (MulReductionVF dst src));
51295129
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -5137,7 +5137,7 @@ instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
51375137
%}
51385138

51395139
instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
5140-
predicate(Matcher::vector_length(n->in(2)) == 16); // src
5140+
predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
51415141
match(Set dst (AddReductionVF dst src));
51425142
match(Set dst (MulReductionVF dst src));
51435143
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -5150,10 +5150,79 @@ instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
51505150
ins_pipe( pipe_slow );
51515151
%}
51525152

5153+
5154+
instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
5155+
// Non-strictly ordered floating-point add/mul reduction for floats. This rule is
5156+
// intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5157+
// src1 contains reduction identity
5158+
predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
5159+
match(Set dst (AddReductionVF src1 src2));
5160+
match(Set dst (MulReductionVF src1 src2));
5161+
effect(TEMP dst);
5162+
format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
5163+
ins_encode %{
5164+
int opcode = this->ideal_Opcode();
5165+
int vlen = Matcher::vector_length(this, $src2);
5166+
__ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
5167+
%}
5168+
ins_pipe( pipe_slow );
5169+
%}
5170+
5171+
instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
5172+
// Non-strictly ordered floating-point add/mul reduction for floats. This rule is
5173+
// intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5174+
// src1 contains reduction identity
5175+
predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
5176+
match(Set dst (AddReductionVF src1 src2));
5177+
match(Set dst (MulReductionVF src1 src2));
5178+
effect(TEMP dst, TEMP vtmp);
5179+
format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
5180+
ins_encode %{
5181+
int opcode = this->ideal_Opcode();
5182+
int vlen = Matcher::vector_length(this, $src2);
5183+
__ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
5184+
%}
5185+
ins_pipe( pipe_slow );
5186+
%}
5187+
5188+
instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
5189+
// Non-strictly ordered floating-point add/mul reduction for floats. This rule is
5190+
// intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5191+
// src1 contains reduction identity
5192+
predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
5193+
match(Set dst (AddReductionVF src1 src2));
5194+
match(Set dst (MulReductionVF src1 src2));
5195+
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5196+
format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5197+
ins_encode %{
5198+
int opcode = this->ideal_Opcode();
5199+
int vlen = Matcher::vector_length(this, $src2);
5200+
__ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5201+
%}
5202+
ins_pipe( pipe_slow );
5203+
%}
5204+
5205+
instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5206+
// Non-strictly ordered floating-point add/mul reduction for floats. This rule is
5207+
// intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5208+
// src1 contains reduction identity
5209+
predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
5210+
match(Set dst (AddReductionVF src1 src2));
5211+
match(Set dst (MulReductionVF src1 src2));
5212+
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5213+
format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5214+
ins_encode %{
5215+
int opcode = this->ideal_Opcode();
5216+
int vlen = Matcher::vector_length(this, $src2);
5217+
__ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5218+
%}
5219+
ins_pipe( pipe_slow );
5220+
%}
5221+
51535222
// =======================Double Reduction==========================================
51545223

51555224
instruct reduction2D(regD dst, vec src, vec vtmp) %{
5156-
predicate(Matcher::vector_length(n->in(2)) == 2); // src
5225+
predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
51575226
match(Set dst (AddReductionVD dst src));
51585227
match(Set dst (MulReductionVD dst src));
51595228
effect(TEMP dst, TEMP vtmp);
@@ -5167,7 +5236,7 @@ instruct reduction2D(regD dst, vec src, vec vtmp) %{
51675236
%}
51685237

51695238
instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
5170-
predicate(Matcher::vector_length(n->in(2)) == 4); // src
5239+
predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
51715240
match(Set dst (AddReductionVD dst src));
51725241
match(Set dst (MulReductionVD dst src));
51735242
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -5181,7 +5250,7 @@ instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
51815250
%}
51825251

51835252
instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
5184-
predicate(Matcher::vector_length(n->in(2)) == 8); // src
5253+
predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
51855254
match(Set dst (AddReductionVD dst src));
51865255
match(Set dst (MulReductionVD dst src));
51875256
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -5194,6 +5263,57 @@ instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
51945263
ins_pipe( pipe_slow );
51955264
%}
51965265

5266+
instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
5267+
// Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5268+
// intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5269+
// src1 contains reduction identity
5270+
predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
5271+
match(Set dst (AddReductionVD src1 src2));
5272+
match(Set dst (MulReductionVD src1 src2));
5273+
effect(TEMP dst);
5274+
format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
5275+
ins_encode %{
5276+
int opcode = this->ideal_Opcode();
5277+
int vlen = Matcher::vector_length(this, $src2);
5278+
__ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
5279+
%}
5280+
ins_pipe( pipe_slow );
5281+
%}
5282+
5283+
instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
5284+
// Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5285+
// intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5286+
// src1 contains reduction identity
5287+
predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
5288+
match(Set dst (AddReductionVD src1 src2));
5289+
match(Set dst (MulReductionVD src1 src2));
5290+
effect(TEMP dst, TEMP vtmp);
5291+
format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
5292+
ins_encode %{
5293+
int opcode = this->ideal_Opcode();
5294+
int vlen = Matcher::vector_length(this, $src2);
5295+
__ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
5296+
%}
5297+
ins_pipe( pipe_slow );
5298+
%}
5299+
5300+
instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5301+
// Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5302+
// intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5303+
// src1 contains reduction identity
5304+
predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
5305+
match(Set dst (AddReductionVD src1 src2));
5306+
match(Set dst (MulReductionVD src1 src2));
5307+
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5308+
format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5309+
ins_encode %{
5310+
int opcode = this->ideal_Opcode();
5311+
int vlen = Matcher::vector_length(this, $src2);
5312+
__ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5313+
%}
5314+
ins_pipe( pipe_slow );
5315+
%}
5316+
51975317
// =======================Byte Reduction==========================================
51985318

51995319
#ifdef _LP64

0 commit comments

Comments
 (0)
Please sign in to comment.