Skip to content

Commit a941397

Browse files
author
Jatin Bhateja
committedJun 9, 2024
8329031: CPUID feature detection for Advanced Performance Extensions (Intel® APX)
Reviewed-by: sviswanathan, kvn
1 parent 8d2f9e5 commit a941397

File tree

8 files changed

+185
-26
lines changed

8 files changed

+185
-26
lines changed
 

‎src/hotspot/cpu/x86/globals_x86.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
115115
"Highest supported AVX instructions set on x86/x64") \
116116
range(0, 3) \
117117
\
118+
\
119+
product(bool, UseAPX, false, EXPERIMENTAL, \
120+
"Use Intel Advanced Performance Extensions") \
121+
\
118122
product(bool, UseKNLSetting, false, DIAGNOSTIC, \
119123
"Control whether Knights platform setting should be used") \
120124
\
@@ -234,8 +238,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
234238
"Turn off JVM mitigations related to Intel micro code " \
235239
"mitigations for the Intel JCC erratum") \
236240
\
237-
product(bool, UseAPX, false, EXPERIMENTAL, \
238-
"Use Advanced Performance Extensions on x86") \
239241
// end of ARCH_FLAGS
240242

241243
#endif // CPU_X86_GLOBALS_X86_HPP

‎src/hotspot/cpu/x86/vm_version_x86.cpp

+110-13
Original file line numberDiff line numberDiff line change
@@ -56,16 +56,22 @@ const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEAT
5656
address VM_Version::_cpuinfo_segv_addr = 0;
5757
// Address of instruction after the one which causes SEGV
5858
address VM_Version::_cpuinfo_cont_addr = 0;
59+
// Address of instruction which causes APX specific SEGV
60+
address VM_Version::_cpuinfo_segv_addr_apx = 0;
61+
// Address of instruction after the one which causes APX specific SEGV
62+
address VM_Version::_cpuinfo_cont_addr_apx = 0;
5963

6064
static BufferBlob* stub_blob;
6165
static const int stub_size = 2000;
6266

6367
extern "C" {
6468
typedef void (*get_cpu_info_stub_t)(void*);
6569
typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
70+
typedef void (*clear_apx_test_state_t)(void);
6671
}
6772
static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
6873
static detect_virt_stub_t detect_virt_stub = nullptr;
74+
static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
6975

7076
#ifdef _LP64
7177

@@ -102,6 +108,27 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
102108

103109
VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
104110

111+
address clear_apx_test_state() {
112+
# define __ _masm->
113+
address start = __ pc();
114+
// EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
115+
// handling guarantees that preserved register values post signal handling were
116+
// re-instantiated by operating system and not because they were not modified externally.
117+
118+
/* FIXME Uncomment following code after OS enablement of
119+
bool save_apx = UseAPX;
120+
VM_Version::set_apx_cpuFeatures();
121+
UseAPX = true;
122+
// EGPR state save/restoration.
123+
__ mov64(r16, 0L);
124+
__ mov64(r31, 0L);
125+
UseAPX = save_apx;
126+
VM_Version::clean_cpuFeatures();
127+
*/
128+
__ ret(0);
129+
return start;
130+
}
131+
105132
address generate_get_cpu_info() {
106133
// Flags to test CPU type.
107134
const uint32_t HS_EFL_AC = 0x40000;
@@ -113,7 +140,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
113140
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
114141

115142
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
116-
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
143+
Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
144+
Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
117145
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
118146

119147
StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@@ -288,7 +316,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
288316
__ movl(Address(rsi, 4), rdx);
289317

290318
//
291-
// cpuid(0x7) Structured Extended Features
319+
// cpuid(0x7) Structured Extended Features Enumeration Leaf.
292320
//
293321
__ bind(sef_cpuid);
294322
__ movl(rax, 7);
@@ -303,12 +331,16 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
303331
__ movl(Address(rsi, 8), rcx);
304332
__ movl(Address(rsi, 12), rdx);
305333

306-
// ECX = 1
334+
//
335+
// cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
336+
//
337+
__ bind(sefsl1_cpuid);
307338
__ movl(rax, 7);
308339
__ movl(rcx, 1);
309340
__ cpuid();
310-
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_ecx1_offset())));
341+
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
311342
__ movl(Address(rsi, 0), rax);
343+
__ movl(Address(rsi, 4), rdx);
312344

313345
//
314346
// Extended cpuid(0x80000000)
@@ -387,6 +419,46 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
387419
__ movl(Address(rsi, 8), rcx);
388420
__ movl(Address(rsi,12), rdx);
389421

422+
#ifndef PRODUCT
423+
//
424+
// Check if OS has enabled XGETBV instruction to access XCR0
425+
// (OSXSAVE feature flag) and CPU supports APX
426+
//
427+
// To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
428+
// and XCRO[19] bit for OS support to save/restore extended GPR state.
429+
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
430+
__ movl(rax, 0x200000);
431+
__ andl(rax, Address(rsi, 4));
432+
__ cmpl(rax, 0x200000);
433+
__ jcc(Assembler::notEqual, vector_save_restore);
434+
// check _cpuid_info.xem_xcr0_eax.bits.apx_f
435+
__ movl(rax, 0x80000);
436+
__ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
437+
__ cmpl(rax, 0x80000);
438+
__ jcc(Assembler::notEqual, vector_save_restore);
439+
440+
/* FIXME: Uncomment while integrating JDK-8329032
441+
bool save_apx = UseAPX;
442+
VM_Version::set_apx_cpuFeatures();
443+
UseAPX = true;
444+
__ mov64(r16, VM_Version::egpr_test_value());
445+
__ mov64(r31, VM_Version::egpr_test_value());
446+
*/
447+
__ xorl(rsi, rsi);
448+
VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
449+
// Generate SEGV
450+
__ movl(rax, Address(rsi, 0));
451+
452+
VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
453+
/* FIXME: Uncomment after integration of JDK-8329032
454+
__ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
455+
__ movq(Address(rsi, 0), r16);
456+
__ movq(Address(rsi, 8), r31);
457+
458+
UseAPX = save_apx;
459+
*/
460+
#endif
461+
__ bind(vector_save_restore);
390462
//
391463
// Check if OS has enabled XGETBV instruction to access XCR0
392464
// (OSXSAVE feature flag) and CPU supports AVX
@@ -580,6 +652,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
580652
__ vmovdqu(xmm7, Address(rsp, 0));
581653
__ addptr(rsp, 32);
582654
#endif // _WINDOWS
655+
583656
generate_vzeroupper(wrapup);
584657
VM_Version::clean_cpuFeatures();
585658
UseAVX = saved_useavx;
@@ -940,6 +1013,7 @@ void VM_Version::get_processor_features() {
9401013
FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
9411014
}
9421015
}
1016+
9431017
if (UseAVX > use_avx_limit) {
9441018
if (UseSSE < 4) {
9451019
warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
@@ -963,6 +1037,16 @@ void VM_Version::get_processor_features() {
9631037
_features &= ~CPU_AVX512_VBMI2;
9641038
_features &= ~CPU_AVX512_BITALG;
9651039
_features &= ~CPU_AVX512_IFMA;
1040+
_features &= ~CPU_APX_F;
1041+
}
1042+
1043+
// Currently APX support is only enabled for targets supporting AVX512VL feature.
1044+
bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1045+
if (UseAPX && !apx_supported) {
1046+
warning("UseAPX is not supported on this CPU, setting it to false");
1047+
FLAG_SET_DEFAULT(UseAPX, false);
1048+
} else if (FLAG_IS_DEFAULT(UseAPX)) {
1049+
FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
9661050
}
9671051

9681052
if (UseAVX < 2) {
@@ -1002,14 +1086,6 @@ void VM_Version::get_processor_features() {
10021086
}
10031087
}
10041088

1005-
// APX support not enabled yet
1006-
if (UseAPX) {
1007-
if (!FLAG_IS_DEFAULT(UseAPX)) {
1008-
warning("APX is not supported on this CPU.");
1009-
}
1010-
FLAG_SET_DEFAULT(UseAPX, false);
1011-
}
1012-
10131089
if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
10141090
_has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
10151091
} else {
@@ -2143,6 +2219,10 @@ int VM_Version::avx3_threshold() {
21432219
FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
21442220
}
21452221

2222+
void VM_Version::clear_apx_test_state() {
2223+
clear_apx_test_state_stub();
2224+
}
2225+
21462226
static bool _vm_version_initialized = false;
21472227

21482228
void VM_Version::initialize() {
@@ -2160,6 +2240,8 @@ void VM_Version::initialize() {
21602240
detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
21612241
g.generate_detect_virt());
21622242

2243+
clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2244+
g.clear_apx_test_state());
21632245
get_processor_features();
21642246

21652247
LP64_ONLY(Assembler::precompute_instructions();)
@@ -2958,6 +3040,10 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
29583040
result |= CPU_SSE4_2;
29593041
if (std_cpuid1_ecx.bits.popcnt != 0)
29603042
result |= CPU_POPCNT;
3043+
if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
3044+
xem_xcr0_eax.bits.apx_f != 0) {
3045+
result |= CPU_APX_F;
3046+
}
29613047
if (std_cpuid1_ecx.bits.avx != 0 &&
29623048
std_cpuid1_ecx.bits.osxsave != 0 &&
29633049
xem_xcr0_eax.bits.sse != 0 &&
@@ -2968,7 +3054,7 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
29683054
result |= CPU_F16C;
29693055
if (sef_cpuid7_ebx.bits.avx2 != 0) {
29703056
result |= CPU_AVX2;
2971-
if (sef_cpuid7_ecx1_eax.bits.avx_ifma != 0)
3057+
if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
29723058
result |= CPU_AVX_IFMA;
29733059
}
29743060
if (sef_cpuid7_ecx.bits.gfni != 0)
@@ -3142,6 +3228,17 @@ bool VM_Version::os_supports_avx_vectors() {
31423228
return retVal;
31433229
}
31443230

3231+
bool VM_Version::os_supports_apx_egprs() {
3232+
if (!supports_apx_f()) {
3233+
return false;
3234+
}
3235+
if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3236+
_cpuid_info.apx_save[1] != egpr_test_value()) {
3237+
return false;
3238+
}
3239+
return true;
3240+
}
3241+
31453242
uint VM_Version::cores_per_cpu() {
31463243
uint result = 1;
31473244
if (is_intel()) {

0 commit comments

Comments
 (0)
Failed to load comments.