Skip to content

Commit 2e1ced6

Browse files
rgiuliettijddarcy
authored andcommittedOct 7, 2024
8341639: Add float16ToShortBits(), signum(), copySign() to Float16
Reviewed-by: darcy
1 parent dd5d236 commit 2e1ced6

File tree

3 files changed

+175
-4
lines changed

3 files changed

+175
-4
lines changed
 

‎src/java.base/share/classes/java/lang/Float16.java

+63-4
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,7 @@ public final class Float16
7676
private static final long serialVersionUID = 16; // Not needed for a value class?
7777

7878
// Functionality for future consideration:
79-
// float16ToShortBits that normalizes NaNs, c.f. floatToIntBits vs floatToRawIntBits
80-
// copysign
8179
// IEEEremainder / remainder operator remainder
82-
// signum
8380

8481
/**
8582
* Returns a {@code Float16} instance wrapping IEEE 754 binary16
@@ -577,7 +574,8 @@ public double doubleValue() {
577574

578575
/**
579576
* Returns a representation of the specified floating-point value
580-
* according to the IEEE 754 floating-point binary16 bit layout.
577+
* according to the IEEE 754 floating-point binary16 bit layout,
578+
* preserving Not-a-Number (NaN) values.
581579
*
582580
* @param f16 a {@code Float16} floating-point number.
583581
* @return the bits that represent the floating-point number.
@@ -589,6 +587,23 @@ public static short float16ToRawShortBits(Float16 f16) {
589587
return f16.value;
590588
}
591589

590+
/**
591+
* Returns a representation of the specified floating-point value
592+
* according to the IEEE 754 floating-point binary16 bit layout.
593+
*
594+
* @param fp16 a {@code Float16} floating-point number.
595+
* @return the bits that represent the floating-point number.
596+
*
597+
* @see Float#floatToIntBits(float)
598+
* @see Double#doubleToLongBits(double)
599+
*/
600+
public static short float16ToShortBits(Float16 fp16) {
601+
if (!isNaN(fp16)) {
602+
return float16ToRawShortBits(fp16);
603+
}
604+
return 0x7e00;
605+
}
606+
592607
/**
593608
* Returns the {@code Float16} value corresponding to a given bit
594609
* representation.
@@ -1313,4 +1328,48 @@ public static Float16 scalb(Float16 v, int scaleFactor) {
13131328
* Double.longBitsToDouble((long) (scaleFactor + DoubleConsts.EXP_BIAS) << Double.PRECISION - 1));
13141329
}
13151330

1331+
/**
1332+
* Returns the first floating-point argument with the sign of the
1333+
* second floating-point argument.
1334+
* This method does not require NaN {@code sign}
1335+
* arguments to be treated as positive values; implementations are
1336+
* permitted to treat some NaN arguments as positive and other NaN
1337+
* arguments as negative to allow greater performance.
1338+
*
1339+
* @apiNote
1340+
* This method corresponds to the copySign operation defined in
1341+
* IEEE 754.
1342+
*
1343+
* @param magnitude the parameter providing the magnitude of the result
1344+
* @param sign the parameter providing the sign of the result
1345+
* @return a value with the magnitude of {@code magnitude}
1346+
* and the sign of {@code sign}.
1347+
*/
1348+
public static Float16 copySign(Float16 magnitude, Float16 sign) {
1349+
return shortBitsToFloat16((short) ((float16ToRawShortBits(sign) &
1350+
(Float16Consts.SIGN_BIT_MASK)) |
1351+
(float16ToRawShortBits(magnitude) &
1352+
(Float16Consts.EXP_BIT_MASK |
1353+
Float16Consts.SIGNIF_BIT_MASK))));
1354+
}
1355+
1356+
/**
1357+
* Returns the signum function of the argument; zero if the argument
1358+
* is zero, 1.0 if the argument is greater than zero, -1.0 if the
1359+
* argument is less than zero.
1360+
*
1361+
* <p>Special Cases:
1362+
* <ul>
1363+
* <li> If the argument is NaN, then the result is NaN.
1364+
* <li> If the argument is positive zero or negative zero, then the
1365+
* result is the same as the argument.
1366+
* </ul>
1367+
*
1368+
* @param f the floating-point value whose signum is to be returned
1369+
* @return the signum function of the argument
1370+
*/
1371+
public static Float16 signum(Float16 f) {
1372+
return (f.floatValue() == 0.0f || isNaN(f)) ? f : copySign(valueOf(1), f);
1373+
}
1374+
13161375
}

‎test/jdk/java/lang/Math/IeeeRecommendedTests.java

+97
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,69 @@ public static int testDoubleBooleanMethods() {
871871

872872
/* ******************** copySign tests******************************** */
873873

874+
public static int testFloat16CopySign() {
875+
int failures = 0;
876+
877+
// testCases[0] are logically positive numbers;
878+
// testCases[1] are negative numbers.
879+
float testCases [][] = {
880+
{+0.0f,
881+
Float16.MIN_VALUE.floatValue(),
882+
Float16_MAX_SUBNORMALmm.floatValue(),
883+
Float16_MAX_SUBNORMAL.floatValue(),
884+
Float16.MIN_NORMAL.floatValue(),
885+
1.0f,
886+
3.0f,
887+
Float16_MAX_VALUEmm.floatValue(),
888+
Float16.MAX_VALUE.floatValue(),
889+
infinityF16.floatValue(),
890+
},
891+
{-infinityF16.floatValue(),
892+
-Float16.MAX_VALUE.floatValue(),
893+
-3.0f,
894+
-1.0f,
895+
-Float16.MIN_NORMAL.floatValue(),
896+
-Float16_MAX_SUBNORMALmm.floatValue(),
897+
-Float16_MAX_SUBNORMAL.floatValue(),
898+
-Float16.MIN_VALUE.floatValue(),
899+
-0.0f}
900+
};
901+
902+
float NaNs[] = {Float16.shortBitsToFloat16((short) 0x7e00).floatValue(), // "positive" NaN
903+
Float16.shortBitsToFloat16((short) 0xfe00).floatValue()}; // "negative" NaN
904+
905+
// Tests shared between raw and non-raw versions
906+
for(int i = 0; i < 2; i++) {
907+
for(int j = 0; j < 2; j++) {
908+
for(int m = 0; m < testCases[i].length; m++) {
909+
for(int n = 0; n < testCases[j].length; n++) {
910+
// copySign(magnitude, sign)
911+
failures+=Tests.test("Float16.copySign(Float16,Float16)",
912+
Float16.valueOf(testCases[i][m]),Float16.valueOf(testCases[j][n]),
913+
Float16.copySign(Float16.valueOf(testCases[i][m]), Float16.valueOf(testCases[j][n])),
914+
Float16.valueOf((j==0?1.0f:-1.0f)*Math.abs(testCases[i][m])) );
915+
}
916+
}
917+
}
918+
}
919+
920+
// For rawCopySign, NaN may effectively have either sign bit
921+
// while for copySign NaNs are treated as if they always have
922+
// a zero sign bit (i.e. as positive numbers)
923+
for(int i = 0; i < 2; i++) {
924+
for(int j = 0; j < NaNs.length; j++) {
925+
for(int m = 0; m < testCases[i].length; m++) {
926+
// copySign(magnitude, sign)
927+
928+
failures += (Float16.abs(Float16.copySign(Float16.valueOf(testCases[i][m]), Float16.valueOf(NaNs[j]))).floatValue() ==
929+
Float16.abs(Float16.valueOf(testCases[i][m])).floatValue()) ? 0:1;
930+
}
931+
}
932+
}
933+
934+
return failures;
935+
}
936+
874937
public static int testFloatCopySign() {
875938
int failures = 0;
876939

@@ -1892,6 +1955,38 @@ public static int testDoubleUlp() {
18921955
return failures;
18931956
}
18941957

1958+
public static int testFloat16Signum() {
1959+
int failures = 0;
1960+
float testCases [][] = {
1961+
{NaNf16.floatValue(), NaNf16.floatValue()},
1962+
{-infinityF16.floatValue(), -1.0f},
1963+
{-Float16.MAX_VALUE.floatValue(), -1.0f},
1964+
{-Float16.MIN_NORMAL.floatValue(), -1.0f},
1965+
{-1.0f, -1.0f},
1966+
{-2.0f, -1.0f},
1967+
{-Float16_MAX_SUBNORMAL.floatValue(), -1.0f},
1968+
{-Float16.MIN_VALUE.floatValue(), -1.0f},
1969+
{-0.0f, -0.0f},
1970+
{+0.0f, +0.0f},
1971+
{Float16.MIN_VALUE.floatValue(), 1.0f},
1972+
{Float16_MAX_SUBNORMALmm.floatValue(), 1.0f},
1973+
{Float16_MAX_SUBNORMAL.floatValue(), 1.0f},
1974+
{Float16.MIN_NORMAL.floatValue(), 1.0f},
1975+
{1.0f, 1.0f},
1976+
{2.0f, 1.0f},
1977+
{Float16_MAX_VALUEmm.floatValue(), 1.0f},
1978+
{Float16.MAX_VALUE.floatValue(), 1.0f},
1979+
{infinityF16.floatValue(), 1.0f}
1980+
};
1981+
1982+
for(int i = 0; i < testCases.length; i++) {
1983+
failures+=Tests.test("Float16.signum(Float16)",
1984+
Float16.valueOf(testCases[i][0]), Float16.signum(Float16.valueOf(testCases[i][0])), Float16.valueOf(testCases[i][1]));
1985+
}
1986+
1987+
return failures;
1988+
}
1989+
18951990
public static int testFloatSignum() {
18961991
int failures = 0;
18971992
float testCases [][] = {
@@ -1981,6 +2076,7 @@ public static void main(String... argv) {
19812076
failures += testFloatBooleanMethods();
19822077
failures += testDoubleBooleanMethods();
19832078

2079+
failures += testFloat16CopySign();
19842080
failures += testFloatCopySign();
19852081
failures += testDoubleCopySign();
19862082

@@ -1991,6 +2087,7 @@ public static void main(String... argv) {
19912087
failures += testFloatUlp();
19922088
failures += testDoubleUlp();
19932089

2090+
failures += testFloat16Signum();
19942091
failures += testFloatSignum();
19952092
failures += testDoubleSignum();
19962093

‎test/jdk/java/lang/Math/Tests.java

+15
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,21 @@ public static int test(String testName,
500500
}
501501
}
502502

503+
public static int test(String testName,
504+
Float16 input1, Float16 input2,
505+
Float16 result, Float16 expected) {
506+
if (Float16.compare(expected, result ) != 0) {
507+
System.err.println("Failure for " + testName + ":\n" +
508+
"\tFor inputs " + input1 + "\t(" + toHexString(input1) + ") and "
509+
+ input2 + "\t(" + toHexString(input2) + ")\n" +
510+
"\texpected " + expected + "\t(" + toHexString(expected) + ")\n" +
511+
"\tgot " + result + "\t(" + toHexString(result) + ").");
512+
return 1;
513+
} else {
514+
return 0;
515+
}
516+
}
517+
503518
public static int test(String testName,
504519
double input1, int input2,
505520
double result, double expected) {

0 commit comments

Comments
 (0)
Please sign in to comment.