@@ -1637,12 +1637,12 @@ void StubGenerator::ev_load_key(XMMRegister xmmdst, Register key, int offset, Re
1637
1637
// Clobber ktmp and rscratch.
1638
1638
// Used by aesctr_encrypt.
1639
1639
void StubGenerator::ev_add128 (XMMRegister xmmdst, XMMRegister xmmsrc1, XMMRegister xmmsrc2,
1640
- int vector_len, KRegister ktmp, Register rscratch ) {
1640
+ int vector_len, KRegister ktmp, XMMRegister ones ) {
1641
1641
__ vpaddq (xmmdst, xmmsrc1, xmmsrc2, vector_len);
1642
- __ evpcmpuq (ktmp, xmmdst, xmmsrc2, __ lt, vector_len);
1643
- __ kshiftlbl (ktmp, ktmp, 1 );
1644
- __ evpaddq (xmmdst, ktmp, xmmdst, ExternalAddress ( counter_mask_ones_addr ()), /* merge */ true ,
1645
- vector_len, rscratch);
1642
+ __ evpcmpuq (ktmp, xmmdst, xmmsrc2, __ lt, vector_len); // set mask[0/1] bit if addq to dst[0/1] wraps
1643
+ __ kshiftlbl (ktmp, ktmp, 1 ); // mask[1] <- mask[0], mask[0] <- 0, etc
1644
+
1645
+ __ evpaddq (xmmdst, ktmp, xmmdst, ones, /* merge */ true , vector_len); // dst[1]++ if mask[1] set
1646
1646
}
1647
1647
1648
1648
// AES-ECB Encrypt Operation
@@ -2125,16 +2125,20 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2125
2125
// each zmm register has 4 counter values as its MSB
2126
2126
// the counters are incremented in parallel
2127
2127
2128
+ const XMMRegister ones = xmm17;
2129
+ // Vector value to propagate carries
2130
+ __ evmovdquq (ones, ExternalAddress (counter_mask_ones_addr ()), Assembler::AVX_512bit, r15);
2131
+
2128
2132
__ evmovdquq (xmm19, ExternalAddress (counter_mask_linc0_addr ()), Assembler::AVX_512bit, r15 /* rscratch*/ );
2129
- ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2130
- __ evmovdquq (xmm19, ExternalAddress (counter_mask_linc4_addr ()), Assembler::AVX_512bit, r15 /* rscratch */ );
2131
- ev_add128 (xmm9, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2132
- ev_add128 (xmm10, xmm9, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2133
- ev_add128 (xmm11, xmm10, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2134
- ev_add128 (xmm12, xmm11, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2135
- ev_add128 (xmm13, xmm12, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2136
- ev_add128 (xmm14, xmm13, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2137
- ev_add128 (xmm15, xmm14, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2133
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2134
+ __ evmovdquq (xmm19, ExternalAddress (counter_mask_linc4_addr ()), Assembler::AVX_512bit);
2135
+ ev_add128 (xmm9, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2136
+ ev_add128 (xmm10, xmm9, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2137
+ ev_add128 (xmm11, xmm10, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2138
+ ev_add128 (xmm12, xmm11, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2139
+ ev_add128 (xmm13, xmm12, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2140
+ ev_add128 (xmm14, xmm13, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2141
+ ev_add128 (xmm15, xmm14, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2138
2142
2139
2143
// load linc32 mask in zmm register.linc32 increments counter by 32
2140
2144
__ evmovdquq (xmm19, ExternalAddress (counter_mask_linc32_addr ()), Assembler::AVX_512bit, r15 /* rscratch*/ );
@@ -2182,21 +2186,21 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2182
2186
// This is followed by incrementing counter values in zmm8-zmm15.
2183
2187
// Since we will be processing 32 blocks at a time, the counter is incremented by 32.
2184
2188
roundEnc (xmm21, 7 );
2185
- ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2189
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2186
2190
roundEnc (xmm22, 7 );
2187
- ev_add128 (xmm9, xmm9, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2191
+ ev_add128 (xmm9, xmm9, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2188
2192
roundEnc (xmm23, 7 );
2189
- ev_add128 (xmm10, xmm10, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2193
+ ev_add128 (xmm10, xmm10, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2190
2194
roundEnc (xmm24, 7 );
2191
- ev_add128 (xmm11, xmm11, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2195
+ ev_add128 (xmm11, xmm11, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2192
2196
roundEnc (xmm25, 7 );
2193
- ev_add128 (xmm12, xmm12, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2197
+ ev_add128 (xmm12, xmm12, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2194
2198
roundEnc (xmm26, 7 );
2195
- ev_add128 (xmm13, xmm13, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2199
+ ev_add128 (xmm13, xmm13, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2196
2200
roundEnc (xmm27, 7 );
2197
- ev_add128 (xmm14, xmm14, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2201
+ ev_add128 (xmm14, xmm14, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2198
2202
roundEnc (xmm28, 7 );
2199
- ev_add128 (xmm15, xmm15, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2203
+ ev_add128 (xmm15, xmm15, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2200
2204
roundEnc (xmm29, 7 );
2201
2205
2202
2206
__ cmpl (rounds, 52 );
@@ -2274,8 +2278,8 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2274
2278
__ vpshufb (xmm3, xmm11, xmm16, Assembler::AVX_512bit);
2275
2279
__ evpxorq (xmm3, xmm3, xmm20, Assembler::AVX_512bit);
2276
2280
// Increment counter values by 16
2277
- ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2278
- ev_add128 (xmm9, xmm9, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2281
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2282
+ ev_add128 (xmm9, xmm9, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2279
2283
// AES encode rounds
2280
2284
roundEnc (xmm21, 3 );
2281
2285
roundEnc (xmm22, 3 );
@@ -2342,7 +2346,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2342
2346
__ vpshufb (xmm1, xmm9, xmm16, Assembler::AVX_512bit);
2343
2347
__ evpxorq (xmm1, xmm1, xmm20, Assembler::AVX_512bit);
2344
2348
// increment counter by 8
2345
- ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2349
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2346
2350
// AES encode
2347
2351
roundEnc (xmm21, 1 );
2348
2352
roundEnc (xmm22, 1 );
@@ -2401,7 +2405,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2401
2405
__ evpxorq (xmm0, xmm0, xmm20, Assembler::AVX_512bit);
2402
2406
2403
2407
// Increment counter
2404
- ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, r15 /* rscratch */ );
2408
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /* ktmp*/ k1, ones );
2405
2409
__ vaesenc (xmm0, xmm0, xmm21, Assembler::AVX_512bit);
2406
2410
__ vaesenc (xmm0, xmm0, xmm22, Assembler::AVX_512bit);
2407
2411
__ vaesenc (xmm0, xmm0, xmm23, Assembler::AVX_512bit);
@@ -2451,7 +2455,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2451
2455
__ evpxorq (xmm0, xmm0, xmm20, Assembler::AVX_128bit);
2452
2456
__ vaesenc (xmm0, xmm0, xmm21, Assembler::AVX_128bit);
2453
2457
// Increment counter by 1
2454
- ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_128bit, /* ktmp*/ k1, r15 /* rscratch */ );
2458
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_128bit, /* ktmp*/ k1, ones );
2455
2459
__ vaesenc (xmm0, xmm0, xmm22, Assembler::AVX_128bit);
2456
2460
__ vaesenc (xmm0, xmm0, xmm23, Assembler::AVX_128bit);
2457
2461
__ vaesenc (xmm0, xmm0, xmm24, Assembler::AVX_128bit);
0 commit comments