@@ -5534,39 +5534,10 @@ instruct vstoremask_truecount_neon(iRegINoSp dst, vReg src, immI_gt_1 size, vReg
5534
5534
5535
5535
// first true
5536
5536
5537
- instruct vmask_firsttrue_lt8e(iRegINoSp dst, vReg src, rFlagsReg cr) %{
5538
- predicate(UseSVE == 0 &&
5539
- Matcher::vector_length(n->in(1)) < 8);
5540
- match(Set dst (VectorMaskFirstTrue src));
5541
- effect(KILL cr);
5542
- format %{ "vmask_firsttrue_lt8e $dst, $src\t# vector < 8 elements (neon). KILL cr" %}
5543
- ins_encode %{
5544
- // Returns the index of the first active lane of the
5545
- // vector mask, or VLENGTH if no lane is active.
5546
- //
5547
- // Input "src" is a vector of boolean represented as
5548
- // bytes with 0x00/0x01 as element values.
5549
- //
5550
- // Computed by reversing the bits and counting the leading
5551
- // zero bytes.
5552
- BasicType bt = Matcher::vector_element_basic_type(this, $src);
5553
- assert(bt == T_BOOLEAN, "unsupported type");
5554
- __ fmovd($dst$$Register, $src$$FloatRegister);
5555
- __ rbit($dst$$Register, $dst$$Register);
5556
- __ clz($dst$$Register, $dst$$Register);
5557
- __ lsrw($dst$$Register, $dst$$Register, 3);
5558
- __ movw(rscratch1, Matcher::vector_length(this, $src));
5559
- __ cmpw($dst$$Register, rscratch1);
5560
- __ cselw($dst$$Register, rscratch1, $dst$$Register, Assembler::GE);
5561
- %}
5562
- ins_pipe(pipe_slow);
5563
- %}
5564
-
5565
- instruct vmask_firsttrue_8or16e(iRegINoSp dst, vReg src) %{
5566
- predicate(UseSVE == 0 &&
5567
- (Matcher::vector_length(n->in(1)) == 8 || Matcher::vector_length(n->in(1)) == 16));
5537
+ instruct vmask_firsttrue_neon(iRegINoSp dst, vReg src) %{
5538
+ predicate(UseSVE == 0);
5568
5539
match(Set dst (VectorMaskFirstTrue src));
5569
- format %{ "vmask_firsttrue_8or16e $dst, $src\t# vector 8B/16B (neon) " %}
5540
+ format %{ "vmask_firsttrue_neon $dst, $src" %}
5570
5541
ins_encode %{
5571
5542
// Returns the index of the first active lane of the
5572
5543
// vector mask, or VLENGTH if no lane is active.
@@ -5579,14 +5550,21 @@ instruct vmask_firsttrue_8or16e(iRegINoSp dst, vReg src) %{
5579
5550
5580
5551
BasicType bt = Matcher::vector_element_basic_type(this, $src);
5581
5552
assert(bt == T_BOOLEAN, "unsupported type");
5582
- uint length_in_bytes = Matcher::vector_length_in_bytes (this, $src);
5583
- if (length_in_bytes = = 8) {
5553
+ uint vlength = Matcher::vector_length (this, $src);
5554
+ if (vlength < = 8) {
5584
5555
__ fmovd($dst$$Register, $src$$FloatRegister);
5556
+ if (vlength == 2 || vlength == 4) {
5557
+ // Special handling for 2B or 4B cases:
5558
+ // Vector mask is moved to a 64-bit general register, but only the low 16/32 bits are
5559
+ // significant for 2B/4B cases. We initialize the 16th/32nd bit as bit 1, so as to generate
5560
+ // the expected result (i.e. VLENGTH) for the case that all lanes are zero.
5561
+ __ orr($dst$$Register, $dst$$Register, vlength == 2 ? 0x10000 : 0x100000000);
5562
+ }
5585
5563
__ rbit($dst$$Register, $dst$$Register);
5586
5564
__ clz($dst$$Register, $dst$$Register);
5587
5565
__ lsrw($dst$$Register, $dst$$Register, 3);
5588
5566
} else {
5589
- assert(length_in_bytes == 16, "must be");
5567
+ assert(vlength == 16, "must be");
5590
5568
Label FIRST_TRUE_INDEX;
5591
5569
5592
5570
// Try to compute the result from lower 64 bits.
0 commit comments