@@ -5109,7 +5109,7 @@ instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtm
5109
5109
// =======================Float Reduction==========================================
5110
5110
5111
5111
instruct reductionF128(regF dst, vec src, vec vtmp) %{
5112
- predicate(Matcher::vector_length(n->in(2)) <= 4); // src
5112
+ predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
5113
5113
match(Set dst (AddReductionVF dst src));
5114
5114
match(Set dst (MulReductionVF dst src));
5115
5115
effect(TEMP dst, TEMP vtmp);
@@ -5123,7 +5123,7 @@ instruct reductionF128(regF dst, vec src, vec vtmp) %{
5123
5123
%}
5124
5124
5125
5125
instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
5126
- predicate(Matcher::vector_length(n->in(2)) == 8); // src
5126
+ predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
5127
5127
match(Set dst (AddReductionVF dst src));
5128
5128
match(Set dst (MulReductionVF dst src));
5129
5129
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -5137,7 +5137,7 @@ instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
5137
5137
%}
5138
5138
5139
5139
instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
5140
- predicate(Matcher::vector_length(n->in(2)) == 16); // src
5140
+ predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
5141
5141
match(Set dst (AddReductionVF dst src));
5142
5142
match(Set dst (MulReductionVF dst src));
5143
5143
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -5150,10 +5150,79 @@ instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
5150
5150
ins_pipe( pipe_slow );
5151
5151
%}
5152
5152
5153
+
5154
+ instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
5155
+ // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
5156
+ // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5157
+ // src1 contains reduction identity
5158
+ predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
5159
+ match(Set dst (AddReductionVF src1 src2));
5160
+ match(Set dst (MulReductionVF src1 src2));
5161
+ effect(TEMP dst);
5162
+ format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
5163
+ ins_encode %{
5164
+ int opcode = this->ideal_Opcode();
5165
+ int vlen = Matcher::vector_length(this, $src2);
5166
+ __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
5167
+ %}
5168
+ ins_pipe( pipe_slow );
5169
+ %}
5170
+
5171
+ instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
5172
+ // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
5173
+ // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5174
+ // src1 contains reduction identity
5175
+ predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
5176
+ match(Set dst (AddReductionVF src1 src2));
5177
+ match(Set dst (MulReductionVF src1 src2));
5178
+ effect(TEMP dst, TEMP vtmp);
5179
+ format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
5180
+ ins_encode %{
5181
+ int opcode = this->ideal_Opcode();
5182
+ int vlen = Matcher::vector_length(this, $src2);
5183
+ __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
5184
+ %}
5185
+ ins_pipe( pipe_slow );
5186
+ %}
5187
+
5188
+ instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
5189
+ // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
5190
+ // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5191
+ // src1 contains reduction identity
5192
+ predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
5193
+ match(Set dst (AddReductionVF src1 src2));
5194
+ match(Set dst (MulReductionVF src1 src2));
5195
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5196
+ format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5197
+ ins_encode %{
5198
+ int opcode = this->ideal_Opcode();
5199
+ int vlen = Matcher::vector_length(this, $src2);
5200
+ __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5201
+ %}
5202
+ ins_pipe( pipe_slow );
5203
+ %}
5204
+
5205
+ instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5206
+ // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
5207
+ // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5208
+ // src1 contains reduction identity
5209
+ predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
5210
+ match(Set dst (AddReductionVF src1 src2));
5211
+ match(Set dst (MulReductionVF src1 src2));
5212
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5213
+ format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5214
+ ins_encode %{
5215
+ int opcode = this->ideal_Opcode();
5216
+ int vlen = Matcher::vector_length(this, $src2);
5217
+ __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5218
+ %}
5219
+ ins_pipe( pipe_slow );
5220
+ %}
5221
+
5153
5222
// =======================Double Reduction==========================================
5154
5223
5155
5224
instruct reduction2D(regD dst, vec src, vec vtmp) %{
5156
- predicate(Matcher::vector_length(n->in(2)) == 2); // src
5225
+ predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
5157
5226
match(Set dst (AddReductionVD dst src));
5158
5227
match(Set dst (MulReductionVD dst src));
5159
5228
effect(TEMP dst, TEMP vtmp);
@@ -5167,7 +5236,7 @@ instruct reduction2D(regD dst, vec src, vec vtmp) %{
5167
5236
%}
5168
5237
5169
5238
instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
5170
- predicate(Matcher::vector_length(n->in(2)) == 4); // src
5239
+ predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
5171
5240
match(Set dst (AddReductionVD dst src));
5172
5241
match(Set dst (MulReductionVD dst src));
5173
5242
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -5181,7 +5250,7 @@ instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
5181
5250
%}
5182
5251
5183
5252
instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
5184
- predicate(Matcher::vector_length(n->in(2)) == 8); // src
5253
+ predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
5185
5254
match(Set dst (AddReductionVD dst src));
5186
5255
match(Set dst (MulReductionVD dst src));
5187
5256
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -5194,6 +5263,57 @@ instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
5194
5263
ins_pipe( pipe_slow );
5195
5264
%}
5196
5265
5266
+ instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
5267
+ // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5268
+ // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5269
+ // src1 contains reduction identity
5270
+ predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
5271
+ match(Set dst (AddReductionVD src1 src2));
5272
+ match(Set dst (MulReductionVD src1 src2));
5273
+ effect(TEMP dst);
5274
+ format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
5275
+ ins_encode %{
5276
+ int opcode = this->ideal_Opcode();
5277
+ int vlen = Matcher::vector_length(this, $src2);
5278
+ __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
5279
+ %}
5280
+ ins_pipe( pipe_slow );
5281
+ %}
5282
+
5283
+ instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
5284
+ // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5285
+ // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5286
+ // src1 contains reduction identity
5287
+ predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
5288
+ match(Set dst (AddReductionVD src1 src2));
5289
+ match(Set dst (MulReductionVD src1 src2));
5290
+ effect(TEMP dst, TEMP vtmp);
5291
+ format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
5292
+ ins_encode %{
5293
+ int opcode = this->ideal_Opcode();
5294
+ int vlen = Matcher::vector_length(this, $src2);
5295
+ __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
5296
+ %}
5297
+ ins_pipe( pipe_slow );
5298
+ %}
5299
+
5300
+ instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
5301
+ // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
5302
+ // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
5303
+ // src1 contains reduction identity
5304
+ predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
5305
+ match(Set dst (AddReductionVD src1 src2));
5306
+ match(Set dst (MulReductionVD src1 src2));
5307
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
5308
+ format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
5309
+ ins_encode %{
5310
+ int opcode = this->ideal_Opcode();
5311
+ int vlen = Matcher::vector_length(this, $src2);
5312
+ __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
5313
+ %}
5314
+ ins_pipe( pipe_slow );
5315
+ %}
5316
+
5197
5317
// =======================Byte Reduction==========================================
5198
5318
5199
5319
#ifdef _LP64
0 commit comments