Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8269404: Base64 Encoding optimization enhancements for x86 using AVX-512 #861

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Expand Up @@ -3758,6 +3758,15 @@ void Assembler::vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
emit_int16((unsigned char)0x8D, (0xC0 | encode));
}

void Assembler::vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512_vbmi(), "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0x8D);
emit_operand(dst, src);
}

void Assembler::vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(vector_len == AVX_128bit ? VM_Version::supports_avx512vlbw() :
vector_len == AVX_256bit ? VM_Version::supports_avx512vlbw() :
Expand Down Expand Up @@ -3838,6 +3847,14 @@ void Assembler::evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int
emit_int16(0x7D, (0xC0 | encode));
}

void Assembler::evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_vbmi(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), ctl->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16((unsigned char)0x83, (unsigned char)(0xC0 | encode));
}

void Assembler::pause() {
emit_int16((unsigned char)0xF3, (unsigned char)0x90);
}
Expand Down Expand Up @@ -4136,6 +4153,15 @@ void Assembler::vpmovmskb(Register dst, XMMRegister src, int vec_enc) {
emit_int16((unsigned char)0xD7, (0xC0 | encode));
}

void Assembler::vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert((VM_Version::supports_avx2() && vector_len == AVX_256bit), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8((unsigned char)0x8C);
emit_operand(dst, src);
}

void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
Expand Down Expand Up @@ -6565,6 +6591,13 @@ void Assembler::psubq(XMMRegister dst, XMMRegister src) {
emit_int8((0xC0 | encode));
}

void Assembler::vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16((unsigned char)0xD8, (0xC0 | encode));
}

void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
Expand Down Expand Up @@ -6656,6 +6689,15 @@ void Assembler::pmuludq(XMMRegister dst, XMMRegister src) {
emit_int16((unsigned char)0xF4, (0xC0 | encode));
}

void Assembler::vpmulhuw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert((vector_len == AVX_128bit && VM_Version::supports_avx()) ||
(vector_len == AVX_256bit && VM_Version::supports_avx2()) ||
(vector_len == AVX_512bit && VM_Version::supports_avx512bw()), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16((unsigned char)0xE4, (0xC0 | encode));
}

void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Expand Up @@ -1690,6 +1690,7 @@ class Assembler : public AbstractAssembler {
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
Expand All @@ -1700,6 +1701,7 @@ class Assembler : public AbstractAssembler {
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len);

void pause();

Expand Down Expand Up @@ -1748,6 +1750,7 @@ class Assembler : public AbstractAssembler {

void pmovmskb(Register dst, XMMRegister src);
void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
void vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

// SSE 4.1 extract
void pextrd(Register dst, XMMRegister src, int imm8);
Expand Down Expand Up @@ -2250,6 +2253,7 @@ class Assembler : public AbstractAssembler {
void psubw(XMMRegister dst, XMMRegister src);
void psubd(XMMRegister dst, XMMRegister src);
void psubq(XMMRegister dst, XMMRegister src);
void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
Expand All @@ -2270,6 +2274,7 @@ class Assembler : public AbstractAssembler {
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmulhuw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

// Minimum of packed integers
void pminsb(XMMRegister dst, XMMRegister src);
Expand Down