openjdk · theRealAph · Oct 11, 2022 · Oct 11, 2022 · Oct 11, 2022 · Oct 12, 2022
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -5167,8 +5167,8 @@ void MacroAssembler::restore_cpu_control_state_after_jni(Register rscratch) {
       // Perform a little arithmetic to make sure that denormal
       // numbers are handled correctly, i.e. that the "Denormals Are
       // Zeros" flag has not been set.
-      movsd(xmm9, ExternalAddress(StubRoutines::x86::addr_unity()), rsi);
-      movsd(xmm8, ExternalAddress(StubRoutines::x86::addr_thresh()), rsi);
+      movsd(xmm9, ExternalAddress(StubRoutines::large_denormal_addr()), rsi);
+      movsd(xmm8, ExternalAddress(StubRoutines::small_denormal_addr()), rsi);
       addsd(xmm8, xmm9);
       ucomisd(xmm8, xmm9);
       jcc(Assembler::equal, FAIL);

diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -3899,10 +3899,6 @@ void StubGenerator::create_control_words() {
   StubRoutines::x86::_mxcsr_std = 0x1F80;
   // Round to zero, 64-bit mode, exceptions masked
   StubRoutines::x86::_mxcsr_rz = 0x7F80;
-  StubRoutines::x86::_unity
-    = jdouble_cast(0x0030000000000000); // 0x1.0p-1020;
-  StubRoutines::x86::_thresh
-    = jdouble_cast(0x0000000000000003); // 0x0.0000000000003p-1022;
 }
 
 // Initialization

diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.hpp b/src/hotspot/cpu/x86/stubRoutines_x86.hpp
@@ -128,8 +128,6 @@ class x86 {
   static jint    _mxcsr_std;
 #ifdef _LP64
   static jint    _mxcsr_rz;
-  static double  _unity;
-  static double  _thresh;
 #endif // _LP64
 
   static address _verify_mxcsr_entry;
@@ -214,8 +212,6 @@ class x86 {
   static address addr_mxcsr_std()        { return (address)&_mxcsr_std; }
 #ifdef _LP64
   static address addr_mxcsr_rz()        { return (address)&_mxcsr_rz; }
-  static address addr_unity()            { return (address)&_unity; }
-  static address addr_thresh()           { return (address)&_thresh; }
 #endif // _LP64
   static address verify_mxcsr_entry()    { return _verify_mxcsr_entry; }
   static address crc_by128_masks_addr()  { return (address)_crc_by128_masks; }

diff --git a/src/hotspot/cpu/x86/stubRoutines_x86_64.cpp b/src/hotspot/cpu/x86/stubRoutines_x86_64.cpp
@@ -33,8 +33,6 @@
 
 jint    StubRoutines::x86::_mxcsr_std = 0;
 jint    StubRoutines::x86::_mxcsr_rz = 0;
-double  StubRoutines::x86::_unity;
-double  StubRoutines::x86::_thresh;
 
 address StubRoutines::x86::_get_previous_sp_entry = nullptr;
 

diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp
@@ -974,7 +974,7 @@ bool os::dll_address_to_library_name(address addr, char* buf,
 // same architecture as Hotspot is running on
 
 void *os::Bsd::dlopen_helper(const char *filename, int mode) {
-#if defined(__GNUC__)
+#ifndef IA32
   // Save and restore the floating-point environment around dlopen().
   // There are known cases where global library initialization sets
   // FPU flags that affect computation accuracy, for example, enabling
@@ -984,30 +984,26 @@ void *os::Bsd::dlopen_helper(const char *filename, int mode) {
   // numerical "accuracy", but we need to protect Java semantics first
   // and foremost. See JDK-8295159.
 
+  // This workaround is ineffective on IA32 systems because the MXCSR
+  // register (which controls flush-to-zero mode) is not stored in the
+  // legacy fenv.
+
   fenv_t default_fenv;
   int rtn = fegetenv(&default_fenv);
   assert(rtn == 0, "fegetnv must succeed");
-#endif // defined(__GNUC__)
+#endif // IA32
 
   void * result= ::dlopen(filename, RTLD_LAZY);
 
-#if defined(__GNUC__)
-  if (result  != nullptr) {
-    // Quickly test to make sure denormals are correctly handled.
-    static const double unity
-      = jdouble_cast(0x0030000000000000); // 0x1.0p-1020;
-    static const volatile double thresh
-      = jdouble_cast(0x0000000000000003); // 0x0.0000000000003p-1022;
-    if (unity + thresh == unity || -unity - thresh == -unity) {
-      // We just dlopen()ed a library that mangled the floating-point
-      // flags. Silently fix things now.
-      int rtn = fesetenv(&default_fenv);
-      assert(rtn == 0, "fesetenv must succeed");
-      assert(unity + thresh != unity && -unity - thresh != -unity,
-             "fsetenv didn't work");
-    }
+#ifndef IA32
+  if (result  != nullptr && StubRoutines::FTZ_mode_enabled()) {
+    // We just dlopen()ed a library that mangled the floating-point
+    // flags. Silently fix things now.
+    int rtn = fesetenv(&default_fenv);
+    assert(rtn == 0, "fesetenv must succeed");
+    assert(! StubRoutines::FTZ_mode_enabled, "fsetenv didn't work");
   }
-#endif // defined(__GNUC__)
+#endif // IA32
 
   return result;
 }

diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
@@ -1798,6 +1798,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
 
 void * os::Linux::dlopen_helper(const char *filename, char *ebuf,
                                 int ebuflen) {
+#ifndef IA32
   // Save and restore the floating-point environment around dlopen().
   // There are known cases where global library initialization sets
   // FPU flags that affect computation accuracy, for example, enabling
@@ -1806,9 +1807,15 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf,
   // that might depend on these FPU features for performance and/or
   // numerical "accuracy", but we need to protect Java semantics first
   // and foremost. See JDK-8295159.
+
+  // This workaround is ineffective on IA32 systems because the MXCSR
+  // register (which controls flush-to-zero mode) is not stored in the
+  // legacy fenv.
+
   fenv_t default_fenv;
   int rtn = fegetenv(&default_fenv);
   assert(rtn == 0, "fegetnv must succeed");
+#endif // IA32
 
   void * result = ::dlopen(filename, RTLD_LAZY);
 
@@ -1841,19 +1848,18 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf,
     event.set_errorMessage(nullptr);
     event.commit();
 #endif
+
+#ifndef IA32
     // Quickly test to make sure denormals are correctly handled.
-    static const double unity
-      = jdouble_cast(0x0030000000000000); // 0x1.0p-1020;
-    static const volatile double thresh
-      = jdouble_cast(0x0000000000000003); // 0x0.0000000000003p-1022;
-    if (unity + thresh == unity || -unity - thresh == -unity) {
+    if (StubRoutines::FTZ_mode_enabled()) {
       // We just dlopen()ed a library that mangled the floating-point
       // flags. Silently fix things now.
       int rtn = fesetenv(&default_fenv);
       assert(rtn == 0, "fesetenv must succeed");
-      assert(unity + thresh != unity && -unity - thresh != -unity,
-             "fsetenv didn't work");
+      assert(! StubRoutines::FTZ_mode_enabled(), "fsetenv didn't work");
     }
+#endif // IA32
+
   }
   return result;
 }

diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp
@@ -180,6 +180,11 @@ address StubRoutines::_cont_thaw          = nullptr;
 address StubRoutines::_cont_returnBarrier = nullptr;
 address StubRoutines::_cont_returnBarrierExc = nullptr;
 
+const double StubRoutines::_large_denormal
+    = jdouble_cast(0x0030000000000000); // 0x1.0p-1020;
+const volatile double StubRoutines::_small_denormal
+    = jdouble_cast(0x0000000000000003); // 0x0.0000000000003p-1022;
+
 JFR_ONLY(RuntimeStub* StubRoutines::_jfr_write_checkpoint_stub = nullptr;)
 JFR_ONLY(address StubRoutines::_jfr_write_checkpoint = nullptr;)
 JFR_ONLY(RuntimeStub* StubRoutines::_jfr_return_lease_stub = nullptr;)
@@ -307,6 +312,28 @@ void compiler_stubs_init(bool in_compiler_thread) {
   }
 }
 
+// Check for Flush-To-Zero mode
+
+// On some processors faster execution can be achieved by returning
+// zero for extremely small results, rather than an IEEE-754 denormal
+// number. This mode is not compatible with the Java Language
+// Standard.
+bool StubRoutines::FTZ_mode_enabled() {
+  // Quickly test to make sure denormals are correctly handled.
+
+  // We need the addition of _large_denormal and _small_denormal to be
+  // performed at runtime. Making _small_denormal volatile ensures
+  // that the following expression isn't evaluated at compile time:
+
+  // _small_denormal is the smallest denormal number that has two bits
+  // set. _large_denormal is a number such that, when _small_denormal
+  // is added it it, must be rounded according to the mode. These two
+  // tests detect the rounding mode in use. If denormals are turned
+  // off (i.e. denormals-are-zero) FTZ mode is in use.
+  return (_large_denormal + _small_denormal == _large_denormal
+      || -_large_denormal - _small_denormal == -_large_denormal);
+}
+
 //
 // Default versions of arraycopy functions
 //

diff --git a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp
@@ -258,6 +258,9 @@ class StubRoutines: AllStatic {
   static address _cont_returnBarrier;
   static address _cont_returnBarrierExc;
 
+  static const double _large_denormal;
+  static const volatile double _small_denormal;
+
   JFR_ONLY(static RuntimeStub* _jfr_write_checkpoint_stub;)
   JFR_ONLY(static address _jfr_write_checkpoint;)
   JFR_ONLY(static RuntimeStub* _jfr_return_lease_stub;)
@@ -480,6 +483,14 @@ class StubRoutines: AllStatic {
   static void arrayof_jlong_copy     (HeapWord* src, HeapWord* dest, size_t count);
   static void arrayof_oop_copy       (HeapWord* src, HeapWord* dest, size_t count);
   static void arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count);
+
+  static address small_denormal_addr() {
+    return (address)&_small_denormal;
+  }
+  static address large_denormal_addr() {
+    return (address)&_large_denormal;
+  }
+  static bool FTZ_mode_enabled();
 };
 
 #endif // SHARE_RUNTIME_STUBROUTINES_HPP
diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestDenormalDouble.java b/test/hotspot/jtreg/compiler/floatingpoint/TestDenormalDouble.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2023, Red Hat, Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it

diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestDenormalFloat.java b/test/hotspot/jtreg/compiler/floatingpoint/TestDenormalFloat.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2023, Red Hat, Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it

diff --git a/test/hotspot/jtreg/compiler/floatingpoint/libfast-math.c b/test/hotspot/jtreg/compiler/floatingpoint/libfast-math.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2023, Red Hat, Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -21,8 +21,6 @@
  * questions.
  */
 
-#include <assert.h>
-#include <fenv.h>
 #include "jni.h"
 
 // See GCC bug 55522:
@@ -34,19 +32,25 @@
 // This breaks Java's floating point arithmetic.
 
 #if defined(__GNUC__)
+
+// On systems on which GCC bug 55522 has been fixed, this constructor
+// serves to reproduce that bug for the purposes of testing HotSpot.
 static void __attribute__((constructor)) set_flush_to_zero(void) {
 
-#if defined(__x86_64__) && defined(SSE)
+#if defined(__x86_64__)
+
 #define MXCSR_DAZ (1 << 6)      /* Enable denormals are zero mode */
 #define MXCSR_FTZ (1 << 15)     /* Enable flush to zero mode */
   unsigned int mxcsr = __builtin_ia32_stmxcsr ();
   mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
   __builtin_ia32_ldmxcsr (mxcsr);
 
 #elif defined(__aarch64__)
+
 #define _FPU_FPCR_FZ (unsigned long)0x1000000
 #define _FPU_SETCW(fpcr) \
-  {  __asm__ __volatile__ ("msr fpcr, %0" : : "r" (fpcr)); }
+  __asm__ __volatile__ ("msr fpcr, %0" : : "r" (fpcr));
+
   /* Flush to zero, round to nearest, IEEE exceptions disabled.  */
   _FPU_SETCW (_FPU_FPCR_FZ);