Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8293329: x86: Improve handling of constants in AES/GHASH stubs #10152

Closed
wants to merge 14 commits into from
98 changes: 40 additions & 58 deletions src/hotspot/cpu/x86/stubGenerator_x86_32.cpp
Original file line number Diff line number Diff line change
@@ -65,6 +65,22 @@
const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
const int FPU_CNTRL_WRD_MASK = 0xFFFF;

ATTRIBUTE_ALIGNED(16) uint32_t KEY_SHUFFLE_MASK[] = {
0x00010203UL, 0x04050607UL, 0x08090A0BUL, 0x0C0D0E0FUL,
};

ATTRIBUTE_ALIGNED(16) uint32_t COUNTER_SHUFFLE_MASK[] = {
0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL,
};

ATTRIBUTE_ALIGNED(16) uint32_t GHASH_BYTE_SWAP_MASK[] = {
0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL,
};

ATTRIBUTE_ALIGNED(16) uint32_t GHASH_LONG_SWAP_MASK[] = {
0x0B0A0908UL, 0x0F0E0D0CUL, 0x03020100UL, 0x07060504UL,
};

// -------------------------------------------------------------------------------------------------------------------------
// Stub Code definitions

@@ -2180,26 +2196,12 @@ class StubGenerator: public StubCodeGenerator {
// AES intrinsic stubs
enum {AESBlockSize = 16};

address generate_key_shuffle_mask() {
__ align(16);
StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
address start = __ pc();
__ emit_data(0x00010203, relocInfo::none, 0 );
__ emit_data(0x04050607, relocInfo::none, 0 );
__ emit_data(0x08090a0b, relocInfo::none, 0 );
__ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
return start;
address key_shuffle_mask_addr() {
return (address)KEY_SHUFFLE_MASK;
}

address generate_counter_shuffle_mask() {
__ align(16);
StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
address start = __ pc();
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
__ emit_data(0x08090a0b, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
return start;
address counter_shuffle_mask_addr() {
return (address)COUNTER_SHUFFLE_MASK;
}

// Utility routine for loading a 128-bit key word in little endian format
@@ -2209,7 +2211,7 @@ class StubGenerator: public StubCodeGenerator {
if (xmm_shuf_mask != xnoreg) {
__ pshufb(xmmdst, xmm_shuf_mask);
} else {
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ pshufb(xmmdst, ExternalAddress(key_shuffle_mask_addr()));
}
}

@@ -2290,7 +2292,7 @@ class StubGenerator: public StubCodeGenerator {
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));

__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
__ movptr(to, to_param);

@@ -2389,7 +2391,7 @@ class StubGenerator: public StubCodeGenerator {
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));

__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
__ movdqu(xmm_result, Address(from, 0));
__ movptr(to, to_param);

@@ -2522,7 +2524,7 @@ class StubGenerator: public StubCodeGenerator {
__ movptr(len_reg , len_param);

const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
// load up xmm regs 2 thru 7 with keys 0-5
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
@@ -2690,7 +2692,7 @@ class StubGenerator: public StubCodeGenerator {
__ movptr(rvec , rvec_param);
__ movptr(len_reg , len_param);

__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
__ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec

__ xorptr(pos, pos);
@@ -2909,11 +2911,11 @@ class StubGenerator: public StubCodeGenerator {

// initialize counter with initial counter
__ movdqu(xmm_curr_counter, Address(counter, 0x00));
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
__ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled for increase

// key length could be only {11, 13, 15} * 4 = {44, 52, 60}
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
__ cmpl(rax, 52);
__ jcc(Assembler::equal, L_key192_top);
@@ -2939,8 +2941,8 @@ class StubGenerator: public StubCodeGenerator {
__ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
__ jcc(Assembler::less, L_singleBlockLoopTop[k]);

__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));

//load, then increase counters
CTR_DoFour(movdqa, xmm_curr_counter);
@@ -2992,8 +2994,8 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_singleBlockLoopTop[k]);
__ cmpptr(len_reg, 0);
__ jcc(Assembler::equal, L_exit);
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
__ movdqa(xmm_result0, xmm_curr_counter);
load_key(xmm_key, key, 0x00, xmm_key_shuf_mask);
__ push(rbx);//rbx is used for increasing counter
@@ -3078,7 +3080,7 @@ class StubGenerator: public StubCodeGenerator {
}

__ BIND(L_exit);
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
__ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
__ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
handleSOERegisters(false /*restoring*/);
@@ -3266,28 +3268,13 @@ class StubGenerator: public StubCodeGenerator {
}

// byte swap x86 long
address generate_ghash_long_swap_mask() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
address start = __ pc();
__ emit_data(0x0b0a0908, relocInfo::none, 0);
__ emit_data(0x0f0e0d0c, relocInfo::none, 0);
__ emit_data(0x03020100, relocInfo::none, 0);
__ emit_data(0x07060504, relocInfo::none, 0);

return start;
address ghash_long_swap_mask_addr() {
return (address)GHASH_LONG_SWAP_MASK;
}

// byte swap x86 byte array
address generate_ghash_byte_swap_mask() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
address start = __ pc();
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
__ emit_data(0x08090a0b, relocInfo::none, 0);
__ emit_data(0x04050607, relocInfo::none, 0);
__ emit_data(0x00010203, relocInfo::none, 0);
return start;
address ghash_byte_swap_mask_addr() {
return (address)GHASH_BYTE_SWAP_MASK;
}

/* Single and multi-block ghash operations */
@@ -3326,14 +3313,14 @@ class StubGenerator: public StubCodeGenerator {
__ movptr(blocks, blocks_param);

__ movdqu(xmm_temp0, Address(state, 0));
__ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
__ pshufb(xmm_temp0, ExternalAddress(ghash_long_swap_mask_addr()));

__ movdqu(xmm_temp1, Address(subkeyH, 0));
__ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
__ pshufb(xmm_temp1, ExternalAddress(ghash_long_swap_mask_addr()));

__ BIND(L_ghash_loop);
__ movdqu(xmm_temp2, Address(data, 0));
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
__ pshufb(xmm_temp2, ExternalAddress(ghash_byte_swap_mask_addr()));

__ pxor(xmm_temp0, xmm_temp2);

@@ -3419,7 +3406,7 @@ class StubGenerator: public StubCodeGenerator {

__ BIND(L_exit);
// Byte swap 16-byte result
__ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
__ pshufb(xmm_temp6, ExternalAddress(ghash_long_swap_mask_addr()));
__ movdqu(Address(state, 0), xmm_temp6); // store the result

handleSOERegisters(false); // restore registers
@@ -4126,16 +4113,13 @@ class StubGenerator: public StubCodeGenerator {

// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others

StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
}

if (UseAESCTRIntrinsics) {
StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
}

@@ -4158,8 +4142,6 @@ class StubGenerator: public StubCodeGenerator {

// Generate GHASH intrinsics code
if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
}

31 changes: 13 additions & 18 deletions src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
Original file line number Diff line number Diff line change
@@ -337,31 +337,26 @@ class StubGenerator: public StubCodeGenerator {

address generate_key_shuffle_mask();

address generate_counter_shuffle_mask();

// This mask is used for incrementing counter value(linc0, linc4, etc.)
address generate_counter_mask_addr();

address generate_ghash_polynomial512_addr();

void roundDec(XMMRegister xmm_reg);
void roundDeclast(XMMRegister xmm_reg);
void roundEnc(XMMRegister key, int rnum);
void lastroundEnc(XMMRegister key, int rnum);
void roundDec(XMMRegister key, int rnum);
void lastroundDec(XMMRegister key, int rnum);
void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl);
void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl, Register rscratch);
void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
bool final_reduction, int index, XMMRegister counter_inc_mask);
// Load key and shuffle operation
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg);
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
void ev_load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);

// Utility routine for loading a 128-bit key word in little endian format
// can optionally specify that the shuffle mask is already in an xmmregister
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg);
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
void load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);

// Utility routine for increase 128bit counter (iv in CTR mode)
void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block);
@@ -376,17 +371,15 @@ class StubGenerator: public StubCodeGenerator {
void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0,
XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3);
void gfmul(XMMRegister tmp0, XMMRegister t);
void generateHtbl_one_block(Register htbl);
void generateHtbl_one_block(Register htbl, Register rscratch);
void generateHtbl_eight_blocks(Register htbl);
void avx_ghash(Register state, Register htbl, Register data, Register blocks);

address generate_ghash_polynomial_addr();

address generate_ghash_shufflemask_addr();

address generate_ghash_long_swap_mask(); // byte swap x86 long

address generate_ghash_byte_swap_mask(); // byte swap x86 byte array
// Used by GHASH and AES stubs.
address ghash_polynomial_addr();
address ghash_shufflemask_addr();
address ghash_long_swap_mask_addr(); // byte swap x86 long
address ghash_byte_swap_mask_addr(); // byte swap x86 byte array

// Single and multi-block ghash operations
address generate_ghash_processBlocks();
@@ -395,6 +388,8 @@ class StubGenerator: public StubCodeGenerator {
address generate_avx_ghash_processBlocks();


// BASE64 stubs

address base64_shuffle_addr();
address base64_avx2_shuffle_addr();
address base64_avx2_input_mask_addr();
Loading