Skip to content

Commit b594f01

Browse files
author
Andrew Haley
committedAug 31, 2023
8314748: 1-10% regressions on Crypto micros
Reviewed-by: chagedorn, adinn, kvn, sviswanathan
1 parent 39f1813 commit b594f01

File tree

2 files changed

+32
-28
lines changed

2 files changed

+32
-28
lines changed
 

‎src/hotspot/cpu/x86/stubGenerator_x86_64.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ class StubGenerator: public StubCodeGenerator {
365365
// Utility routine for increase 128bit counter (iv in CTR mode)
366366
void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block);
367367
void ev_add128(XMMRegister xmmdst, XMMRegister xmmsrc1, XMMRegister xmmsrc2,
368-
int vector_len, KRegister ktmp, Register rscratch = noreg);
368+
int vector_len, KRegister ktmp, XMMRegister ones);
369369
void generate_aes_stubs();
370370

371371

‎src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp

+31-27
Original file line numberDiff line numberDiff line change
@@ -1637,12 +1637,12 @@ void StubGenerator::ev_load_key(XMMRegister xmmdst, Register key, int offset, Re
16371637
// Clobber ktmp and rscratch.
16381638
// Used by aesctr_encrypt.
16391639
void StubGenerator::ev_add128(XMMRegister xmmdst, XMMRegister xmmsrc1, XMMRegister xmmsrc2,
1640-
int vector_len, KRegister ktmp, Register rscratch) {
1640+
int vector_len, KRegister ktmp, XMMRegister ones) {
16411641
__ vpaddq(xmmdst, xmmsrc1, xmmsrc2, vector_len);
1642-
__ evpcmpuq(ktmp, xmmdst, xmmsrc2, __ lt, vector_len);
1643-
__ kshiftlbl(ktmp, ktmp, 1);
1644-
__ evpaddq(xmmdst, ktmp, xmmdst, ExternalAddress(counter_mask_ones_addr()), /*merge*/true,
1645-
vector_len, rscratch);
1642+
__ evpcmpuq(ktmp, xmmdst, xmmsrc2, __ lt, vector_len); // set mask[0/1] bit if addq to dst[0/1] wraps
1643+
__ kshiftlbl(ktmp, ktmp, 1); // mask[1] <- mask[0], mask[0] <- 0, etc
1644+
1645+
__ evpaddq(xmmdst, ktmp, xmmdst, ones, /*merge*/true, vector_len); // dst[1]++ if mask[1] set
16461646
}
16471647

16481648
// AES-ECB Encrypt Operation
@@ -2125,16 +2125,20 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
21252125
// each zmm register has 4 counter values as its MSB
21262126
// the counters are incremented in parallel
21272127

2128+
const XMMRegister ones = xmm17;
2129+
// Vector value to propagate carries
2130+
__ evmovdquq(ones, ExternalAddress(counter_mask_ones_addr()), Assembler::AVX_512bit, r15);
2131+
21282132
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc0_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2129-
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2130-
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
2131-
ev_add128(xmm9, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2132-
ev_add128(xmm10, xmm9, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2133-
ev_add128(xmm11, xmm10, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2134-
ev_add128(xmm12, xmm11, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2135-
ev_add128(xmm13, xmm12, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2136-
ev_add128(xmm14, xmm13, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2137-
ev_add128(xmm15, xmm14, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2133+
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
2134+
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit);
2135+
ev_add128(xmm9, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
2136+
ev_add128(xmm10, xmm9, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
2137+
ev_add128(xmm11, xmm10, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
2138+
ev_add128(xmm12, xmm11, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
2139+
ev_add128(xmm13, xmm12, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
2140+
ev_add128(xmm14, xmm13, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
2141+
ev_add128(xmm15, xmm14, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
21382142

21392143
// load linc32 mask in zmm register.linc32 increments counter by 32
21402144
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc32_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
@@ -2182,21 +2186,21 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
21822186
// This is followed by incrementing counter values in zmm8-zmm15.
21832187
// Since we will be processing 32 blocks at a time, the counter is incremented by 32.
21842188
roundEnc(xmm21, 7);
2185-
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2189+
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
21862190
roundEnc(xmm22, 7);
2187-
ev_add128(xmm9, xmm9, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2191+
ev_add128(xmm9, xmm9, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
21882192
roundEnc(xmm23, 7);
2189-
ev_add128(xmm10, xmm10, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2193+
ev_add128(xmm10, xmm10, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
21902194
roundEnc(xmm24, 7);
2191-
ev_add128(xmm11, xmm11, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2195+
ev_add128(xmm11, xmm11, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
21922196
roundEnc(xmm25, 7);
2193-
ev_add128(xmm12, xmm12, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2197+
ev_add128(xmm12, xmm12, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
21942198
roundEnc(xmm26, 7);
2195-
ev_add128(xmm13, xmm13, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2199+
ev_add128(xmm13, xmm13, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
21962200
roundEnc(xmm27, 7);
2197-
ev_add128(xmm14, xmm14, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2201+
ev_add128(xmm14, xmm14, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
21982202
roundEnc(xmm28, 7);
2199-
ev_add128(xmm15, xmm15, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2203+
ev_add128(xmm15, xmm15, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
22002204
roundEnc(xmm29, 7);
22012205

22022206
__ cmpl(rounds, 52);
@@ -2274,8 +2278,8 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
22742278
__ vpshufb(xmm3, xmm11, xmm16, Assembler::AVX_512bit);
22752279
__ evpxorq(xmm3, xmm3, xmm20, Assembler::AVX_512bit);
22762280
// Increment counter values by 16
2277-
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2278-
ev_add128(xmm9, xmm9, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2281+
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
2282+
ev_add128(xmm9, xmm9, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
22792283
// AES encode rounds
22802284
roundEnc(xmm21, 3);
22812285
roundEnc(xmm22, 3);
@@ -2342,7 +2346,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
23422346
__ vpshufb(xmm1, xmm9, xmm16, Assembler::AVX_512bit);
23432347
__ evpxorq(xmm1, xmm1, xmm20, Assembler::AVX_512bit);
23442348
// increment counter by 8
2345-
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2349+
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
23462350
// AES encode
23472351
roundEnc(xmm21, 1);
23482352
roundEnc(xmm22, 1);
@@ -2401,7 +2405,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
24012405
__ evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit);
24022406

24032407
// Increment counter
2404-
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, r15 /*rscratch*/);
2408+
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_512bit, /*ktmp*/k1, ones);
24052409
__ vaesenc(xmm0, xmm0, xmm21, Assembler::AVX_512bit);
24062410
__ vaesenc(xmm0, xmm0, xmm22, Assembler::AVX_512bit);
24072411
__ vaesenc(xmm0, xmm0, xmm23, Assembler::AVX_512bit);
@@ -2451,7 +2455,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
24512455
__ evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_128bit);
24522456
__ vaesenc(xmm0, xmm0, xmm21, Assembler::AVX_128bit);
24532457
// Increment counter by 1
2454-
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_128bit, /*ktmp*/k1, r15 /*rscratch*/);
2458+
ev_add128(xmm8, xmm8, xmm19, Assembler::AVX_128bit, /*ktmp*/k1, ones);
24552459
__ vaesenc(xmm0, xmm0, xmm22, Assembler::AVX_128bit);
24562460
__ vaesenc(xmm0, xmm0, xmm23, Assembler::AVX_128bit);
24572461
__ vaesenc(xmm0, xmm0, xmm24, Assembler::AVX_128bit);

0 commit comments

Comments
 (0)
Please sign in to comment.