Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8295159: DSO created with -ffast-math breaks Java floating-point arithmetic #10661

Closed
wants to merge 43 commits into from
Closed
Changes from 2 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
462ce61
8295159: test cases
theRealAph Oct 11, 2022
2512e8f
8295159: DSO created with -ffast-math breaks Java floating-point arit…
theRealAph Oct 11, 2022
af243d4
Whitespace
theRealAph Oct 11, 2022
8d83473
8295159: DSO created with -ffast-math breaks Java floating-point arit…
theRealAph Oct 12, 2022
d5655d4
8295159: DSO created with -ffast-math breaks Java floating-point arit…
theRealAph Oct 12, 2022
fe388b8
8295159: DSO created with -ffast-math breaks Java floating-point arit…
theRealAph Oct 12, 2022
bcff459
8295159: DSO created with -ffast-math breaks Java floating-point arit…
theRealAph Oct 12, 2022
2d7c957
Update src/hotspot/os/linux/os_linux.cpp
theRealAph Oct 12, 2022
3f442be
Update src/hotspot/os/linux/os_linux.cpp
theRealAph Oct 12, 2022
64ef36f
8295159: DSO created with -ffast-math breaks Java floating-point arit…
theRealAph Oct 12, 2022
1d511ec
Temp
theRealAph Oct 25, 2022
25c5118
More.
theRealAph Oct 26, 2022
4b60f28
Merge branch 'clean' into JDK-8295159
Sep 25, 2023
e5e4b6a
Safepoint experiment
Sep 26, 2023
19781ce
Cleanup & simplify
Sep 26, 2023
e6c37eb
Fix for x86_32
Sep 27, 2023
a24209b
Cleanup
Sep 27, 2023
d1fc29e
Cleanup
Sep 28, 2023
d6ccd97
x86-32 changes
Sep 28, 2023
a4971ab
AArch64
Sep 28, 2023
50591de
MacOS
Sep 28, 2023
de779b4
Stash x86-32 changes
Oct 2, 2023
9164f8b
Remove x32 handling
Oct 2, 2023
b8a605b
Fix conditional compilation
Oct 3, 2023
c0cd014
cleanup
Oct 3, 2023
a1fb1de
Fix LLVM
Oct 3, 2023
d38b2ae
Give x32 bug its own ID.
Oct 10, 2023
c56adbd
Merge branch 'JDK-8295159' of https://github.com/theRealAph/jdk into …
Oct 10, 2023
01f6e22
Add TestDenormalDouble.java
Oct 11, 2023
d9c50a2
Review feedback
Oct 16, 2023
a3305e5
Comments only.
Oct 16, 2023
b817d47
Remove change to RestoreMXCSROnJNICalls
Oct 17, 2023
95e861d
Merge branch 'JDK-8295159' of https://github.com/theRealAph/jdk into …
Oct 17, 2023
7cba08d
Review feedback
Oct 17, 2023
91eb9bb
s/Denormal/Subnormal/g
Oct 26, 2023
393a438
Merge from head
Oct 26, 2023
c554746
Remove dead code
Oct 26, 2023
2ca6f8c
Duh
Oct 26, 2023
bd51efd
Remove accidental include
Oct 26, 2023
30a1381
Move IEE subnormal check to globalDefinitions
Oct 27, 2023
53388f9
Fix header
Oct 27, 2023
80ce877
Merge
Nov 3, 2023
b141262
Delete src/hotspot/os/linux/.#os_linux.cpp
theRealAph Nov 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/hotspot/cpu/x86/macroAssembler_x86.cpp
Original file line number Diff line number Diff line change
@@ -5167,8 +5167,8 @@ void MacroAssembler::restore_cpu_control_state_after_jni(Register rscratch) {
// Perform a little arithmetic to make sure that denormal
// numbers are handled correctly, i.e. that the "Denormals Are
// Zeros" flag has not been set.
movsd(xmm9, ExternalAddress(StubRoutines::x86::addr_unity()), rsi);
movsd(xmm8, ExternalAddress(StubRoutines::x86::addr_thresh()), rsi);
movsd(xmm9, ExternalAddress(StubRoutines::large_denormal_addr()), rsi);
movsd(xmm8, ExternalAddress(StubRoutines::small_denormal_addr()), rsi);
addsd(xmm8, xmm9);
ucomisd(xmm8, xmm9);
jcc(Assembler::equal, FAIL);
4 changes: 0 additions & 4 deletions src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
Original file line number Diff line number Diff line change
@@ -3899,10 +3899,6 @@ void StubGenerator::create_control_words() {
StubRoutines::x86::_mxcsr_std = 0x1F80;
// Round to zero, 64-bit mode, exceptions masked
StubRoutines::x86::_mxcsr_rz = 0x7F80;
StubRoutines::x86::_unity
= jdouble_cast(0x0030000000000000); // 0x1.0p-1020;
StubRoutines::x86::_thresh
= jdouble_cast(0x0000000000000003); // 0x0.0000000000003p-1022;
}

// Initialization
4 changes: 0 additions & 4 deletions src/hotspot/cpu/x86/stubRoutines_x86.hpp
Original file line number Diff line number Diff line change
@@ -128,8 +128,6 @@ class x86 {
static jint _mxcsr_std;
#ifdef _LP64
static jint _mxcsr_rz;
static double _unity;
static double _thresh;
#endif // _LP64

static address _verify_mxcsr_entry;
@@ -214,8 +212,6 @@ class x86 {
static address addr_mxcsr_std() { return (address)&_mxcsr_std; }
#ifdef _LP64
static address addr_mxcsr_rz() { return (address)&_mxcsr_rz; }
static address addr_unity() { return (address)&_unity; }
static address addr_thresh() { return (address)&_thresh; }
#endif // _LP64
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
2 changes: 0 additions & 2 deletions src/hotspot/cpu/x86/stubRoutines_x86_64.cpp
Original file line number Diff line number Diff line change
@@ -33,8 +33,6 @@

jint StubRoutines::x86::_mxcsr_std = 0;
jint StubRoutines::x86::_mxcsr_rz = 0;
double StubRoutines::x86::_unity;
double StubRoutines::x86::_thresh;

address StubRoutines::x86::_get_previous_sp_entry = nullptr;

32 changes: 14 additions & 18 deletions src/hotspot/os/bsd/os_bsd.cpp
Original file line number Diff line number Diff line change
@@ -974,7 +974,7 @@ bool os::dll_address_to_library_name(address addr, char* buf,
// same architecture as Hotspot is running on

void *os::Bsd::dlopen_helper(const char *filename, int mode) {
#if defined(__GNUC__)
#ifndef IA32
// Save and restore the floating-point environment around dlopen().
// There are known cases where global library initialization sets
// FPU flags that affect computation accuracy, for example, enabling
@@ -984,30 +984,26 @@ void *os::Bsd::dlopen_helper(const char *filename, int mode) {
// numerical "accuracy", but we need to protect Java semantics first
// and foremost. See JDK-8295159.

// This workaround is ineffective on IA32 systems because the MXCSR
// register (which controls flush-to-zero mode) is not stored in the
// legacy fenv.

fenv_t default_fenv;
int rtn = fegetenv(&default_fenv);
assert(rtn == 0, "fegetnv must succeed");
#endif // defined(__GNUC__)
#endif // IA32

void * result= ::dlopen(filename, RTLD_LAZY);

#if defined(__GNUC__)
if (result != nullptr) {
// Quickly test to make sure denormals are correctly handled.
static const double unity
= jdouble_cast(0x0030000000000000); // 0x1.0p-1020;
static const volatile double thresh
= jdouble_cast(0x0000000000000003); // 0x0.0000000000003p-1022;
if (unity + thresh == unity || -unity - thresh == -unity) {
// We just dlopen()ed a library that mangled the floating-point
// flags. Silently fix things now.
int rtn = fesetenv(&default_fenv);
assert(rtn == 0, "fesetenv must succeed");
assert(unity + thresh != unity && -unity - thresh != -unity,
"fsetenv didn't work");
}
#ifndef IA32
if (result != nullptr && StubRoutines::FTZ_mode_enabled()) {
// We just dlopen()ed a library that mangled the floating-point
// flags. Silently fix things now.
int rtn = fesetenv(&default_fenv);
assert(rtn == 0, "fesetenv must succeed");
assert(! StubRoutines::FTZ_mode_enabled, "fsetenv didn't work");
}
#endif // defined(__GNUC__)
#endif // IA32

return result;
}
20 changes: 13 additions & 7 deletions src/hotspot/os/linux/os_linux.cpp
Original file line number Diff line number Diff line change
@@ -1798,6 +1798,7 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {

void * os::Linux::dlopen_helper(const char *filename, char *ebuf,
int ebuflen) {
#ifndef IA32
// Save and restore the floating-point environment around dlopen().
// There are known cases where global library initialization sets
// FPU flags that affect computation accuracy, for example, enabling
@@ -1806,9 +1807,15 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf,
// that might depend on these FPU features for performance and/or
// numerical "accuracy", but we need to protect Java semantics first
// and foremost. See JDK-8295159.

// This workaround is ineffective on IA32 systems because the MXCSR
// register (which controls flush-to-zero mode) is not stored in the
// legacy fenv.

fenv_t default_fenv;
int rtn = fegetenv(&default_fenv);
assert(rtn == 0, "fegetnv must succeed");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo: fetgetnv -> fegetenv

#endif // IA32

void * result = ::dlopen(filename, RTLD_LAZY);

@@ -1841,19 +1848,18 @@ void * os::Linux::dlopen_helper(const char *filename, char *ebuf,
event.set_errorMessage(nullptr);
event.commit();
#endif

#ifndef IA32
// Quickly test to make sure denormals are correctly handled.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I recommend using "subnormal" rather than "denormal" for general terminology on this point. While "denormal" was used in the original IEEE 754 standard from 1985, subsequent iterations of the standard using "subnormal" The term "subnormal" has also been used for the last several editions of JLS and JVMS.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've long avoided "subnormal" because

subnormal in British English, adjective
3. [old-fashioned, offensive] a person of low intelligence

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All done. Good to go?

static const double unity
= jdouble_cast(0x0030000000000000); // 0x1.0p-1020;
static const volatile double thresh
= jdouble_cast(0x0000000000000003); // 0x0.0000000000003p-1022;
if (unity + thresh == unity || -unity - thresh == -unity) {
if (StubRoutines::FTZ_mode_enabled()) {
// We just dlopen()ed a library that mangled the floating-point
// flags. Silently fix things now.
int rtn = fesetenv(&default_fenv);
assert(rtn == 0, "fesetenv must succeed");
assert(unity + thresh != unity && -unity - thresh != -unity,
"fsetenv didn't work");
assert(! StubRoutines::FTZ_mode_enabled(), "fsetenv didn't work");
}
#endif // IA32

}
return result;
}
27 changes: 27 additions & 0 deletions src/hotspot/share/runtime/stubRoutines.cpp
Original file line number Diff line number Diff line change
@@ -180,6 +180,11 @@ address StubRoutines::_cont_thaw = nullptr;
address StubRoutines::_cont_returnBarrier = nullptr;
address StubRoutines::_cont_returnBarrierExc = nullptr;

const double StubRoutines::_large_denormal
= jdouble_cast(0x0030000000000000); // 0x1.0p-1020;
const volatile double StubRoutines::_small_denormal
= jdouble_cast(0x0000000000000003); // 0x0.0000000000003p-1022;

JFR_ONLY(RuntimeStub* StubRoutines::_jfr_write_checkpoint_stub = nullptr;)
JFR_ONLY(address StubRoutines::_jfr_write_checkpoint = nullptr;)
JFR_ONLY(RuntimeStub* StubRoutines::_jfr_return_lease_stub = nullptr;)
@@ -307,6 +312,28 @@ void compiler_stubs_init(bool in_compiler_thread) {
}
}

// Check for Flush-To-Zero mode

// On some processors faster execution can be achieved by returning
// zero for extremely small results, rather than an IEEE-754 denormal
// number. This mode is not compatible with the Java Language
// Standard.
bool StubRoutines::FTZ_mode_enabled() {
// Quickly test to make sure denormals are correctly handled.

// We need the addition of _large_denormal and _small_denormal to be
// performed at runtime. Making _small_denormal volatile ensures
// that the following expression isn't evaluated at compile time:

// _small_denormal is the smallest denormal number that has two bits
// set. _large_denormal is a number such that, when _small_denormal
// is added it it, must be rounded according to the mode. These two
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/it it, must/to it, it must/

// tests detect the rounding mode in use. If denormals are turned
// off (i.e. denormals-are-zero) FTZ mode is in use.
return (_large_denormal + _small_denormal == _large_denormal
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a possible future expansion, if there are cases where foreign or native code sets the rounding mode to something other than round to nearest, expressions in the same vein can be used to detect that case and restore that other aspect of the control word.

|| -_large_denormal - _small_denormal == -_large_denormal);
}

//
// Default versions of arraycopy functions
//
11 changes: 11 additions & 0 deletions src/hotspot/share/runtime/stubRoutines.hpp
Original file line number Diff line number Diff line change
@@ -258,6 +258,9 @@ class StubRoutines: AllStatic {
static address _cont_returnBarrier;
static address _cont_returnBarrierExc;

static const double _large_denormal;
static const volatile double _small_denormal;

JFR_ONLY(static RuntimeStub* _jfr_write_checkpoint_stub;)
JFR_ONLY(static address _jfr_write_checkpoint;)
JFR_ONLY(static RuntimeStub* _jfr_return_lease_stub;)
@@ -480,6 +483,14 @@ class StubRoutines: AllStatic {
static void arrayof_jlong_copy (HeapWord* src, HeapWord* dest, size_t count);
static void arrayof_oop_copy (HeapWord* src, HeapWord* dest, size_t count);
static void arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count);

static address small_denormal_addr() {
return (address)&_small_denormal;
}
static address large_denormal_addr() {
return (address)&_large_denormal;
}
static bool FTZ_mode_enabled();
};

#endif // SHARE_RUNTIME_STUBROUTINES_HPP
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, Red Hat, Inc. All rights reserved.
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, Red Hat, Inc. All rights reserved.
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
14 changes: 9 additions & 5 deletions test/hotspot/jtreg/compiler/floatingpoint/libfast-math.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, Red Hat, Inc. All rights reserved.
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -21,8 +21,6 @@
* questions.
*/

#include <assert.h>
#include <fenv.h>
#include "jni.h"

// See GCC bug 55522:
@@ -34,19 +32,25 @@
// This breaks Java's floating point arithmetic.

#if defined(__GNUC__)

// On systems on which GCC bug 55522 has been fixed, this constructor
// serves to reproduce that bug for the purposes of testing HotSpot.
static void __attribute__((constructor)) set_flush_to_zero(void) {

#if defined(__x86_64__) && defined(SSE)
#if defined(__x86_64__)

#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */
#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */
unsigned int mxcsr = __builtin_ia32_stmxcsr ();
mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
__builtin_ia32_ldmxcsr (mxcsr);

#elif defined(__aarch64__)

#define _FPU_FPCR_FZ (unsigned long)0x1000000
#define _FPU_SETCW(fpcr) \
{ __asm__ __volatile__ ("msr fpcr, %0" : : "r" (fpcr)); }
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (fpcr));

/* Flush to zero, round to nearest, IEEE exceptions disabled. */
_FPU_SETCW (_FPU_FPCR_FZ);