Skip to content

Commit d7273ac

Browse files
author
Hamlin Li
committedMar 7, 2024
8320646: RISC-V: C2 VectorCastHF2F
8320647: RISC-V: C2 VectorCastF2HF Reviewed-by: luhenry, fyang
1 parent 53c4714 commit d7273ac

9 files changed

+378
-7
lines changed
 

‎src/hotspot/cpu/riscv/assembler_riscv.hpp

+17-2
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,7 @@ static Assembler::SEW elemtype_to_sew(BasicType etype) {
11911191
void NAME(Register Rd, uint32_t imm, SEW sew, LMUL lmul = m1, \
11921192
VMA vma = mu, VTA vta = tu, bool vill = false) { \
11931193
unsigned insn = 0; \
1194-
guarantee(is_uimm5(imm), "imm is invalid"); \
1194+
guarantee(is_uimm5(imm), "uimm is invalid"); \
11951195
patch((address)&insn, 6, 0, op); \
11961196
patch((address)&insn, 14, 12, funct3); \
11971197
patch((address)&insn, 19, 15, imm); \
@@ -1327,7 +1327,7 @@ enum VectorMask {
13271327
// r_vm
13281328
#define INSN(NAME, op, funct3, funct6) \
13291329
void NAME(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { \
1330-
guarantee(is_uimm5(imm), "imm is invalid"); \
1330+
guarantee(is_uimm5(imm), "uimm is invalid"); \
13311331
patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \
13321332
}
13331333

@@ -1340,6 +1340,9 @@ enum VectorMask {
13401340
INSN(vslideup_vi, 0b1010111, 0b011, 0b001110);
13411341
INSN(vslidedown_vi, 0b1010111, 0b011, 0b001111);
13421342

1343+
// Vector Narrowing Integer Right Shift Instructions
1344+
INSN(vnsra_wi, 0b1010111, 0b011, 0b101101);
1345+
13431346
#undef INSN
13441347

13451348
#define INSN(NAME, op, funct3, funct6) \
@@ -1505,6 +1508,9 @@ enum VectorMask {
15051508
INSN(vmulh_vx, 0b1010111, 0b110, 0b100111);
15061509
INSN(vmul_vx, 0b1010111, 0b110, 0b100101);
15071510

1511+
// Vector Widening Integer Add/Subtract
1512+
INSN(vwadd_vx, 0b1010111, 0b110, 0b110001);
1513+
15081514
// Vector Integer Min/Max Instructions
15091515
INSN(vmax_vx, 0b1010111, 0b100, 0b000111);
15101516
INSN(vmaxu_vx, 0b1010111, 0b100, 0b000110);
@@ -1538,6 +1544,8 @@ enum VectorMask {
15381544
// Vector Single-Width Integer Add and Subtract
15391545
INSN(vsub_vx, 0b1010111, 0b100, 0b000010);
15401546
INSN(vadd_vx, 0b1010111, 0b100, 0b000000);
1547+
1548+
// Vector Integer reverse subtract
15411549
INSN(vrsub_vx, 0b1010111, 0b100, 0b000011);
15421550

15431551
// Vector Slide Instructions
@@ -1600,16 +1608,23 @@ enum VectorMask {
16001608
patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \
16011609
}
16021610

1611+
// Vector Integer Comparison Instructions
16031612
INSN(vmsgt_vi, 0b1010111, 0b011, 0b011111);
16041613
INSN(vmsgtu_vi, 0b1010111, 0b011, 0b011110);
16051614
INSN(vmsle_vi, 0b1010111, 0b011, 0b011101);
16061615
INSN(vmsleu_vi, 0b1010111, 0b011, 0b011100);
16071616
INSN(vmsne_vi, 0b1010111, 0b011, 0b011001);
16081617
INSN(vmseq_vi, 0b1010111, 0b011, 0b011000);
1618+
1619+
// Vector Bitwise Logical Instructions
16091620
INSN(vxor_vi, 0b1010111, 0b011, 0b001011);
16101621
INSN(vor_vi, 0b1010111, 0b011, 0b001010);
16111622
INSN(vand_vi, 0b1010111, 0b011, 0b001001);
1623+
1624+
// Vector Single-Width Integer Add and Subtract
16121625
INSN(vadd_vi, 0b1010111, 0b011, 0b000000);
1626+
1627+
// Vector Integer reverse subtract
16131628
INSN(vrsub_vi, 0b1010111, 0b011, 0b000011);
16141629

16151630
#undef INSN

‎src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

+114-3
Original file line numberDiff line numberDiff line change
@@ -2064,8 +2064,8 @@ static void float16_to_float_slow_path(C2_MacroAssembler& masm, C2GeneralStub<Fl
20642064
void C2_MacroAssembler::float16_to_float(FloatRegister dst, Register src, Register tmp) {
20652065
auto stub = C2CodeStub::make<FloatRegister, Register, Register>(dst, src, tmp, 20, float16_to_float_slow_path);
20662066

2067-
// in riscv, NaN needs a special process as fcvt does not work in that case.
2068-
// in riscv, Inf does not need a special process as fcvt can handle it correctly.
2067+
// On riscv, NaN needs a special process as fcvt does not work in that case.
2068+
// On riscv, Inf does not need a special process as fcvt can handle it correctly.
20692069
// but we consider to get the slow path to process NaN and Inf at the same time,
20702070
// as both of them are rare cases, and if we try to get the slow path to handle
20712071
// only NaN case it would sacrifise the performance for normal cases,
@@ -2112,7 +2112,7 @@ static void float_to_float16_slow_path(C2_MacroAssembler& masm, C2GeneralStub<Re
21122112
void C2_MacroAssembler::float_to_float16(Register dst, FloatRegister src, FloatRegister ftmp, Register xtmp) {
21132113
auto stub = C2CodeStub::make<Register, FloatRegister, Register>(dst, src, xtmp, 130, float_to_float16_slow_path);
21142114

2115-
// in riscv, NaN needs a special process as fcvt does not work in that case.
2115+
// On riscv, NaN needs a special process as fcvt does not work in that case.
21162116

21172117
// check whether it's a NaN.
21182118
// replace fclass with feq as performance optimization.
@@ -2127,6 +2127,117 @@ void C2_MacroAssembler::float_to_float16(Register dst, FloatRegister src, FloatR
21272127
bind(stub->continuation());
21282128
}
21292129

2130+
static void float16_to_float_v_slow_path(C2_MacroAssembler& masm, C2GeneralStub<VectorRegister, VectorRegister, uint>& stub) {
2131+
#define __ masm.
2132+
VectorRegister dst = stub.data<0>();
2133+
VectorRegister src = stub.data<1>();
2134+
uint vector_length = stub.data<2>();
2135+
__ bind(stub.entry());
2136+
2137+
// following instructions mainly focus on NaN, as riscv does not handle
2138+
// NaN well with vfwcvt_f_f_v, but the code also works for Inf at the same time.
2139+
//
2140+
// construct NaN's in 32 bits from the NaN's in 16 bits,
2141+
// we need the payloads of non-canonical NaNs to be preserved.
2142+
2143+
// adjust vector type to 2 * SEW.
2144+
__ vsetvli_helper(T_FLOAT, vector_length, Assembler::m1);
2145+
// widen and sign-extend src data.
2146+
__ vsext_vf2(dst, src, Assembler::v0_t);
2147+
__ mv(t0, 0x7f800000);
2148+
// sign-bit was already set via sign-extension if necessary.
2149+
__ vsll_vi(dst, dst, 13, Assembler::v0_t);
2150+
__ vor_vx(dst, dst, t0, Assembler::v0_t);
2151+
2152+
__ j(stub.continuation());
2153+
#undef __
2154+
}
2155+
2156+
// j.l.Float.float16ToFloat
2157+
void C2_MacroAssembler::float16_to_float_v(VectorRegister dst, VectorRegister src, uint vector_length) {
2158+
auto stub = C2CodeStub::make<VectorRegister, VectorRegister, uint>
2159+
(dst, src, vector_length, 24, float16_to_float_v_slow_path);
2160+
assert_different_registers(dst, src);
2161+
2162+
// On riscv, NaN needs a special process as vfwcvt_f_f_v does not work in that case.
2163+
// On riscv, Inf does not need a special process as vfwcvt_f_f_v can handle it correctly.
2164+
// but we consider to get the slow path to process NaN and Inf at the same time,
2165+
// as both of them are rare cases, and if we try to get the slow path to handle
2166+
// only NaN case it would sacrifise the performance for normal cases,
2167+
// i.e. non-NaN and non-Inf cases.
2168+
2169+
vsetvli_helper(BasicType::T_SHORT, vector_length, Assembler::mf2);
2170+
2171+
// check whether there is a NaN or +/- Inf.
2172+
mv(t0, 0x7c00);
2173+
vand_vx(v0, src, t0);
2174+
// v0 will be used as mask in slow path.
2175+
vmseq_vx(v0, v0, t0);
2176+
vcpop_m(t0, v0);
2177+
2178+
// For non-NaN or non-Inf cases, just use built-in instructions.
2179+
vfwcvt_f_f_v(dst, src);
2180+
2181+
// jump to stub processing NaN and Inf cases if there is any of them in the vector-wide.
2182+
bnez(t0, stub->entry());
2183+
2184+
bind(stub->continuation());
2185+
}
2186+
2187+
static void float_to_float16_v_slow_path(C2_MacroAssembler& masm,
2188+
C2GeneralStub<VectorRegister, VectorRegister, VectorRegister>& stub) {
2189+
#define __ masm.
2190+
VectorRegister dst = stub.data<0>();
2191+
VectorRegister src = stub.data<1>();
2192+
VectorRegister tmp = stub.data<2>();
2193+
__ bind(stub.entry());
2194+
2195+
// mul is already set to mf2 in float_to_float16_v.
2196+
2197+
// preserve the payloads of non-canonical NaNs.
2198+
__ vnsra_wi(dst, src, 13, Assembler::v0_t);
2199+
2200+
// preserve the sign bit.
2201+
__ vnsra_wi(tmp, src, 26, Assembler::v0_t);
2202+
__ vsll_vi(tmp, tmp, 10, Assembler::v0_t);
2203+
__ mv(t0, 0x3ff);
2204+
__ vor_vx(tmp, tmp, t0, Assembler::v0_t);
2205+
2206+
// get the result by merging sign bit and payloads of preserved non-canonical NaNs.
2207+
__ vand_vv(dst, dst, tmp, Assembler::v0_t);
2208+
2209+
__ j(stub.continuation());
2210+
#undef __
2211+
}
2212+
2213+
// j.l.Float.float16ToFloat
2214+
void C2_MacroAssembler::float_to_float16_v(VectorRegister dst, VectorRegister src, VectorRegister vtmp,
2215+
Register tmp, uint vector_length) {
2216+
assert_different_registers(dst, src, vtmp);
2217+
2218+
auto stub = C2CodeStub::make<VectorRegister, VectorRegister, VectorRegister>
2219+
(dst, src, vtmp, 28, float_to_float16_v_slow_path);
2220+
2221+
// On riscv, NaN needs a special process as vfncvt_f_f_w does not work in that case.
2222+
2223+
vsetvli_helper(BasicType::T_FLOAT, vector_length, Assembler::m1);
2224+
2225+
// check whether there is a NaN.
2226+
// replace v_fclass with vmseq_vv as performance optimization.
2227+
vmfne_vv(v0, src, src);
2228+
vcpop_m(t0, v0);
2229+
2230+
vsetvli_helper(BasicType::T_SHORT, vector_length, Assembler::mf2, tmp);
2231+
2232+
// For non-NaN cases, just use built-in instructions.
2233+
vfncvt_f_f_w(dst, src);
2234+
2235+
// jump to stub processing NaN cases.
2236+
bnez(t0, stub->entry());
2237+
2238+
bind(stub->continuation());
2239+
}
2240+
21302241
void C2_MacroAssembler::signum_fp_v(VectorRegister dst, VectorRegister one, BasicType bt, int vlen) {
21312242
vsetvli_helper(bt, vlen);
21322243

‎src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,9 @@
188188
void expand_bits_i_v(Register dst, Register src, Register mask);
189189
void expand_bits_l_v(Register dst, Register src, Register mask);
190190

191+
void float16_to_float_v(VectorRegister dst, VectorRegister src, uint vector_length);
192+
void float_to_float16_v(VectorRegister dst, VectorRegister src, VectorRegister vtmp, Register tmp, uint vector_length);
193+
191194
void string_equals_v(Register r1, Register r2,
192195
Register result, Register cnt1);
193196

‎src/hotspot/cpu/riscv/globals_riscv.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ define_pd_global(intx, InlineSmallCode, 1000);
115115
product(bool, UseZtso, false, EXPERIMENTAL, "Assume Ztso memory model") \
116116
product(bool, UseZihintpause, false, EXPERIMENTAL, \
117117
"Use Zihintpause instructions") \
118+
product(bool, UseZvfh, false, EXPERIMENTAL, "Use Zvfh instructions") \
118119
product(bool, UseZvkn, false, EXPERIMENTAL, \
119120
"Use Zvkn group extension, Zvkned, Zvknhb, Zvkb, Zvkt") \
120121
product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \

‎src/hotspot/cpu/riscv/riscv_v.ad

+34
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ source %{
7373
return false;
7474
}
7575
break;
76+
case Op_VectorCastHF2F:
77+
case Op_VectorCastF2HF:
78+
return UseZvfh;
7679
default:
7780
break;
7881
}
@@ -3660,6 +3663,37 @@ instruct vsignum_reg(vReg dst, vReg zero, vReg one, vRegMask_V0 v0) %{
36603663
ins_pipe(pipe_slow);
36613664
%}
36623665

3666+
// ---------------- Convert Half Floating to Floating Vector Operations ----------------
3667+
3668+
// half precision -> single
3669+
3670+
instruct vconvHF2F(vReg dst, vReg src, vRegMask_V0 v0) %{
3671+
predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
3672+
match(Set dst (VectorCastHF2F src));
3673+
effect(TEMP_DEF dst, TEMP v0);
3674+
format %{ "vfwcvt.f.f.v $dst, $src\t# convert half to single precision" %}
3675+
ins_encode %{
3676+
__ float16_to_float_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
3677+
Matcher::vector_length(this));
3678+
%}
3679+
ins_pipe(pipe_slow);
3680+
%}
3681+
3682+
// single precision -> half
3683+
3684+
instruct vconvF2HF(vReg dst, vReg src, vReg vtmp, vRegMask_V0 v0, iRegINoSp tmp) %{
3685+
predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
3686+
match(Set dst (VectorCastF2HF src));
3687+
effect(TEMP_DEF dst, TEMP v0, TEMP vtmp, TEMP tmp);
3688+
format %{ "vfncvt.f.f.w $dst, $src\t# convert single to half precision" %}
3689+
ins_encode %{
3690+
__ float_to_float16_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
3691+
as_VectorRegister($vtmp$$reg), $tmp$$Register,
3692+
Matcher::vector_length(this));
3693+
%}
3694+
ins_pipe(pipe_slow);
3695+
%}
3696+
36633697
// ------------------------------ Vector Load Gather ---------------------------
36643698

36653699
instruct gather_load(vReg dst, indirect mem, vReg idx) %{

‎src/hotspot/cpu/riscv/vm_version_riscv.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ class VM_Version : public Abstract_VM_Version {
152152
decl(ext_Ztso , "Ztso" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZtso)) \
153153
decl(ext_Zihintpause , "Zihintpause" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZihintpause)) \
154154
decl(ext_Zacas , "Zacas" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZacas)) \
155+
decl(ext_Zvfh , "Zvfh" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZvfh)) \
155156
decl(mvendorid , "VendorId" , RV_NO_FLAG_BIT, false, NO_UPDATE_DEFAULT) \
156157
decl(marchid , "ArchId" , RV_NO_FLAG_BIT, false, NO_UPDATE_DEFAULT) \
157158
decl(mimpid , "ImpId" , RV_NO_FLAG_BIT, false, NO_UPDATE_DEFAULT) \

‎src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,8 @@ void VM_Version::rivos_features() {
252252
ext_Ztso.enable_feature();
253253
ext_Zihintpause.enable_feature();
254254

255+
ext_Zvfh.enable_feature();
256+
255257
unaligned_access.enable_feature(MISALIGNED_FAST);
256258
satp_mode.enable_feature(VM_SV48);
257259

‎test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -26,7 +26,9 @@
2626
* @bug 8294588
2727
* @summary Auto-vectorize Float.floatToFloat16, Float.float16ToFloat APIs
2828
* @requires vm.compiler2.enabled
29-
* @requires (os.simpleArch == "x64" & (vm.cpu.features ~= ".*avx512f.*" | vm.cpu.features ~= ".*f16c.*")) | os.arch == "aarch64"
29+
* @requires (os.simpleArch == "x64" & (vm.cpu.features ~= ".*avx512f.*" | vm.cpu.features ~= ".*f16c.*")) |
30+
* os.arch == "aarch64" |
31+
* (os.arch == "riscv64" & vm.cpu.features ~= ".*zvfh.*")
3032
* @library /test/lib /
3133
* @run driver compiler.vectorization.TestFloatConversionsVector
3234
*/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
/*
2+
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
/**
25+
* @test
26+
* @bug 8320646
27+
* @summary Auto-vectorize Float.floatToFloat16, Float.float16ToFloat APIs, with NaN
28+
* @requires vm.compiler2.enabled
29+
* @requires (os.arch == "riscv64" & vm.cpu.features ~= ".*zvfh.*")
30+
* @library /test/lib /
31+
* @run driver compiler.vectorization.TestFloatConversionsVectorNaN
32+
*/
33+
34+
package compiler.vectorization;
35+
36+
import java.util.HexFormat;
37+
38+
import compiler.lib.ir_framework.*;
39+
import jdk.test.lib.Asserts;
40+
41+
public class TestFloatConversionsVectorNaN {
42+
private static final int ARRLEN = 1024;
43+
private static final int ITERS = 11000;
44+
private static float [] finp;
45+
private static short [] sout;
46+
private static short [] sinp;
47+
private static float [] fout;
48+
49+
public static void main(String args[]) {
50+
TestFramework.runWithFlags("-XX:-TieredCompilation",
51+
"-XX:CompileThresholdScaling=0.3");
52+
System.out.println("PASSED");
53+
}
54+
55+
@Test
56+
@IR(counts = {IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"})
57+
public void test_float_float16(short[] sout, float[] finp) {
58+
for (int i = 0; i < finp.length; i++) {
59+
sout[i] = Float.floatToFloat16(finp[i]);
60+
}
61+
}
62+
63+
@Run(test = {"test_float_float16"}, mode = RunMode.STANDALONE)
64+
public void kernel_test_float_float16() {
65+
int errno = 0;
66+
finp = new float[ARRLEN];
67+
sout = new short[ARRLEN];
68+
69+
// Setup
70+
for (int i = 0; i < ARRLEN; i++) {
71+
if (i%39 == 0) {
72+
int x = 0x7f800000 + ((i/39) << 13);
73+
x = (i%2 == 0) ? x : (x | 0x80000000);
74+
finp[i] = Float.intBitsToFloat(x);
75+
} else {
76+
finp[i] = (float) i * 1.4f;
77+
}
78+
}
79+
int ranges[][] = {
80+
{128, 64},
81+
{256, 19},
82+
{384-19, 19},
83+
{512-19, 17},
84+
{640+19, 19},
85+
{768+19, 32},
86+
{896-19, 32}
87+
};
88+
for (int range[] : ranges) {
89+
int start = range[0];
90+
int offset = range[1];
91+
for (int i = start; i < start+offset; i++) {
92+
int x = 0x7f800000 + (i << 13);
93+
finp[i] = Float.intBitsToFloat(x);
94+
}
95+
}
96+
97+
// Test
98+
for (int i = 0; i < ITERS; i++) {
99+
test_float_float16(sout, finp);
100+
}
101+
102+
// Verifying the result
103+
for (int i = 0; i < ARRLEN; i++) {
104+
errno += assertEquals(i, finp[i], Float.floatToFloat16(finp[i]), sout[i]);
105+
}
106+
107+
if (errno > 0) {
108+
throw new RuntimeException("errors occur");
109+
}
110+
}
111+
112+
static int assertEquals(int idx, float f, short expected, short actual) {
113+
HexFormat hf = HexFormat.of();
114+
String msg = "floatToFloat16 wrong result: idx: " + idx + ", \t" + f +
115+
",\t expected: " + hf.toHexDigits(expected) +
116+
",\t actual: " + hf.toHexDigits(actual);
117+
if ((expected & 0x7c00) != 0x7c00) {
118+
if (expected != actual) {
119+
System.err.println(msg);
120+
return 1;
121+
}
122+
} else if ((expected & 0x3ff) != 0) {
123+
if (((actual & 0x7c00) != 0x7c00) || (actual & 0x3ff) == 0) {
124+
System.err.println(msg);
125+
return 1;
126+
}
127+
}
128+
return 0;
129+
}
130+
131+
@Test
132+
@IR(counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"})
133+
public void test_float16_float(float[] fout, short[] sinp) {
134+
for (int i = 0; i < sinp.length; i++) {
135+
fout[i] = Float.float16ToFloat(sinp[i]);
136+
}
137+
}
138+
139+
@Run(test = {"test_float16_float"}, mode = RunMode.STANDALONE)
140+
public void kernel_test_float16_float() {
141+
int errno = 0;
142+
sinp = new short[ARRLEN];
143+
fout = new float[ARRLEN];
144+
145+
// Setup
146+
for (int i = 0; i < ARRLEN; i++) {
147+
if (i%39 == 0) {
148+
int x = 0x7c00 + i;
149+
x = (i%2 == 0) ? x : (x | 0x8000);
150+
sinp[i] = (short)x;
151+
} else {
152+
sinp[i] = (short)i;
153+
}
154+
}
155+
156+
int ranges[][] = {
157+
{128, 64},
158+
{256, 19},
159+
{384-19, 19},
160+
{512-19, 17},
161+
{640+19, 19},
162+
{768+19, 32},
163+
{896-19, 32}
164+
};
165+
for (int range[] : ranges) {
166+
int start = range[0];
167+
int offset = range[1];
168+
for (int i = start; i < start+offset; i++) {
169+
int x = 0x7c00 + i;
170+
x = (i%2 == 0) ? x : (x | 0x8000);
171+
sinp[i] = (short)x;
172+
}
173+
}
174+
175+
// Test
176+
for (int i = 0; i < ITERS; i++) {
177+
test_float16_float(fout, sinp);
178+
}
179+
180+
// Verifying the result
181+
for (int i = 0; i < ARRLEN; i++) {
182+
errno += assertEquals(i, sinp[i], Float.float16ToFloat(sinp[i]), fout[i]);
183+
}
184+
185+
if (errno > 0) {
186+
throw new RuntimeException("errors occur");
187+
}
188+
}
189+
190+
static int assertEquals(int idx, short s, float expected, float actual) {
191+
String msg = "float16ToFloat wrong result: idx: " + idx + ", \t" + s +
192+
",\t expected: " + expected + ",\t" + Integer.toHexString(Float.floatToIntBits(expected)) +
193+
",\t actual: " + actual + ",\t" + Integer.toHexString(Float.floatToIntBits(actual));
194+
if (!Float.isNaN(expected) || !Float.isNaN(actual)) {
195+
if (expected != actual) {
196+
System.err.println(msg);
197+
return 1;
198+
}
199+
}
200+
return 0;
201+
}
202+
}

0 commit comments

Comments
 (0)
Please sign in to comment.