@@ -121,6 +121,16 @@ static address counter_mask_linc32_addr() {
121
121
return (address)COUNTER_MASK_LINC32;
122
122
}
123
123
124
+ ATTRIBUTE_ALIGNED(64) uint64_t COUNTER_MASK_ONES[] = {
125
+ 0x0000000000000000UL, 0x0000000000000001UL,
126
+ 0x0000000000000000UL, 0x0000000000000001UL,
127
+ 0x0000000000000000UL, 0x0000000000000001UL,
128
+ 0x0000000000000000UL, 0x0000000000000001UL,
129
+ };
130
+ static address counter_mask_ones_addr() {
131
+ return (address)COUNTER_MASK_ONES;
132
+ }
133
+
124
134
ATTRIBUTE_ALIGNED(64) static const uint64_t GHASH_POLYNOMIAL_REDUCTION[] = {
125
135
0x00000001C2000000UL, 0xC200000000000000UL,
126
136
0x00000001C2000000UL, 0xC200000000000000UL,
@@ -1623,6 +1633,17 @@ void StubGenerator::ev_load_key(XMMRegister xmmdst, Register key, int offset, Re
1623
1633
__ evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
1624
1634
}
1625
1635
1636
+ // Add 128-bit integers in xmmsrc1 to xmmsrc2, then place the result in xmmdst.
1637
+ // Clobber ktmp and rscratch.
1638
+ // Used by aesctr_encrypt.
1639
+ void StubGenerator::ev_add128(XMMRegister xmmdst, XMMRegister xmmsrc1, XMMRegister xmmsrc2,
1640
+ int vector_len, KRegister ktmp, Register rscratch) {
1641
+ __ vpaddq(xmmdst, xmmsrc1, xmmsrc2, vector_len);
1642
+ __ evpcmpuq(ktmp, xmmdst, xmmsrc2, __ lt, vector_len);
1643
+ __ kshiftlbl(ktmp, ktmp, 1);
1644
+ __ evpaddq(xmmdst, ktmp, xmmdst, ExternalAddress(counter_mask_ones_addr()), /*merge*/true,
1645
+ vector_len, rscratch);
1646
+ }
1626
1647
1627
1648
// AES-ECB Encrypt Operation
1628
1649
void StubGenerator::aesecb_encrypt(Register src_addr, Register dest_addr, Register key, Register len) {
@@ -2046,7 +2067,6 @@ void StubGenerator::aesecb_decrypt(Register src_addr, Register dest_addr, Regist
2046
2067
}
2047
2068
2048
2069
2049
-
2050
2070
// AES Counter Mode using VAES instructions
2051
2071
void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
2052
2072
Register len_reg, Register used, Register used_addr, Register saved_encCounter_start) {
@@ -2104,14 +2124,17 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2104
2124
// The counter is incremented after each block i.e. 16 bytes is processed;
2105
2125
// each zmm register has 4 counter values as its MSB
2106
2126
// the counters are incremented in parallel
2107
- __ vpaddd(xmm8, xmm8, ExternalAddress(counter_mask_linc0_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2108
- __ vpaddd(xmm9, xmm8, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2109
- __ vpaddd(xmm10, xmm9, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2110
- __ vpaddd(xmm11, xmm10, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2111
- __ vpaddd(xmm12, xmm11, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2112
- __ vpaddd(xmm13, xmm12, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2113
- __ vpaddd(xmm14, xmm13, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2114
- __ vpaddd(xmm15, xmm14, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2127
+
2128
+ __ evmovdquq(xmm19, ExternalAddress(counter_mask_linc0_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2129
+ ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2130
+ __ evmovdquq(xmm19, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2131
+ ev_add128(xmm9, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2132
+ ev_add128(xmm10, xmm9, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2133
+ ev_add128(xmm11, xmm10, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2134
+ ev_add128(xmm12, xmm11, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2135
+ ev_add128(xmm13, xmm12, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2136
+ ev_add128(xmm14, xmm13, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2137
+ ev_add128(xmm15, xmm14, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2115
2138
2116
2139
// load linc32 mask in zmm register.linc32 increments counter by 32
2117
2140
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc32_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
@@ -2159,21 +2182,21 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2159
2182
// This is followed by incrementing counter values in zmm8-zmm15.
2160
2183
// Since we will be processing 32 blocks at a time, the counter is incremented by 32.
2161
2184
roundEnc(xmm21, 7);
2162
- __ vpaddq (xmm8, xmm8, xmm19, Assembler::AVX_512bit);
2185
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2163
2186
roundEnc(xmm22, 7);
2164
- __ vpaddq (xmm9, xmm9, xmm19, Assembler::AVX_512bit);
2187
+ ev_add128 (xmm9, xmm9, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2165
2188
roundEnc(xmm23, 7);
2166
- __ vpaddq (xmm10, xmm10, xmm19, Assembler::AVX_512bit);
2189
+ ev_add128 (xmm10, xmm10, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2167
2190
roundEnc(xmm24, 7);
2168
- __ vpaddq (xmm11, xmm11, xmm19, Assembler::AVX_512bit);
2191
+ ev_add128 (xmm11, xmm11, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2169
2192
roundEnc(xmm25, 7);
2170
- __ vpaddq (xmm12, xmm12, xmm19, Assembler::AVX_512bit);
2193
+ ev_add128 (xmm12, xmm12, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2171
2194
roundEnc(xmm26, 7);
2172
- __ vpaddq (xmm13, xmm13, xmm19, Assembler::AVX_512bit);
2195
+ ev_add128 (xmm13, xmm13, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2173
2196
roundEnc(xmm27, 7);
2174
- __ vpaddq (xmm14, xmm14, xmm19, Assembler::AVX_512bit);
2197
+ ev_add128 (xmm14, xmm14, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2175
2198
roundEnc(xmm28, 7);
2176
- __ vpaddq (xmm15, xmm15, xmm19, Assembler::AVX_512bit);
2199
+ ev_add128 (xmm15, xmm15, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2177
2200
roundEnc(xmm29, 7);
2178
2201
2179
2202
__ cmpl(rounds, 52);
@@ -2251,8 +2274,8 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2251
2274
__ vpshufb(xmm3, xmm11, xmm16, Assembler::AVX_512bit);
2252
2275
__ evpxorq(xmm3, xmm3, xmm20, Assembler::AVX_512bit);
2253
2276
// Increment counter values by 16
2254
- __ vpaddq (xmm8, xmm8, xmm19, Assembler::AVX_512bit);
2255
- __ vpaddq (xmm9, xmm9, xmm19, Assembler::AVX_512bit);
2277
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2278
+ ev_add128 (xmm9, xmm9, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2256
2279
// AES encode rounds
2257
2280
roundEnc(xmm21, 3);
2258
2281
roundEnc(xmm22, 3);
@@ -2319,7 +2342,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2319
2342
__ vpshufb(xmm1, xmm9, xmm16, Assembler::AVX_512bit);
2320
2343
__ evpxorq(xmm1, xmm1, xmm20, Assembler::AVX_512bit);
2321
2344
// increment counter by 8
2322
- __ vpaddq (xmm8, xmm8, xmm19, Assembler::AVX_512bit);
2345
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2323
2346
// AES encode
2324
2347
roundEnc(xmm21, 1);
2325
2348
roundEnc(xmm22, 1);
@@ -2376,8 +2399,9 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2376
2399
// XOR counter with first roundkey
2377
2400
__ vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit);
2378
2401
__ evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit);
2402
+
2379
2403
// Increment counter
2380
- __ vpaddq (xmm8, xmm8, xmm19, Assembler::AVX_512bit);
2404
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/ );
2381
2405
__ vaesenc(xmm0, xmm0, xmm21, Assembler::AVX_512bit);
2382
2406
__ vaesenc(xmm0, xmm0, xmm22, Assembler::AVX_512bit);
2383
2407
__ vaesenc(xmm0, xmm0, xmm23, Assembler::AVX_512bit);
@@ -2427,7 +2451,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
2427
2451
__ evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_128bit);
2428
2452
__ vaesenc(xmm0, xmm0, xmm21, Assembler::AVX_128bit);
2429
2453
// Increment counter by 1
2430
- __ vpaddq (xmm8, xmm8, xmm19, Assembler::AVX_128bit);
2454
+ ev_add128 (xmm8, xmm8, xmm19, Assembler::AVX_128bit, /*ktmp*/k1, r15 /*rscratch*/ );
2431
2455
__ vaesenc(xmm0, xmm0, xmm22, Assembler::AVX_128bit);
2432
2456
__ vaesenc(xmm0, xmm0, xmm23, Assembler::AVX_128bit);
2433
2457
__ vaesenc(xmm0, xmm0, xmm24, Assembler::AVX_128bit);
0 commit comments