@@ -2064,8 +2064,8 @@ static void float16_to_float_slow_path(C2_MacroAssembler& masm, C2GeneralStub<Fl
2064
2064
void C2_MacroAssembler::float16_to_float (FloatRegister dst, Register src, Register tmp) {
2065
2065
auto stub = C2CodeStub::make<FloatRegister, Register, Register>(dst, src, tmp, 20 , float16_to_float_slow_path);
2066
2066
2067
- // in riscv, NaN needs a special process as fcvt does not work in that case.
2068
- // in riscv, Inf does not need a special process as fcvt can handle it correctly.
2067
+ // On riscv, NaN needs a special process as fcvt does not work in that case.
2068
+ // On riscv, Inf does not need a special process as fcvt can handle it correctly.
2069
2069
// but we consider to get the slow path to process NaN and Inf at the same time,
2070
2070
// as both of them are rare cases, and if we try to get the slow path to handle
2071
2071
// only NaN case it would sacrifise the performance for normal cases,
@@ -2112,7 +2112,7 @@ static void float_to_float16_slow_path(C2_MacroAssembler& masm, C2GeneralStub<Re
2112
2112
void C2_MacroAssembler::float_to_float16 (Register dst, FloatRegister src, FloatRegister ftmp, Register xtmp) {
2113
2113
auto stub = C2CodeStub::make<Register, FloatRegister, Register>(dst, src, xtmp, 130 , float_to_float16_slow_path);
2114
2114
2115
- // in riscv, NaN needs a special process as fcvt does not work in that case.
2115
+ // On riscv, NaN needs a special process as fcvt does not work in that case.
2116
2116
2117
2117
// check whether it's a NaN.
2118
2118
// replace fclass with feq as performance optimization.
@@ -2127,6 +2127,117 @@ void C2_MacroAssembler::float_to_float16(Register dst, FloatRegister src, FloatR
2127
2127
bind (stub->continuation ());
2128
2128
}
2129
2129
2130
+ static void float16_to_float_v_slow_path (C2_MacroAssembler& masm, C2GeneralStub<VectorRegister, VectorRegister, uint >& stub) {
2131
+ #define __ masm.
2132
+ VectorRegister dst = stub.data <0 >();
2133
+ VectorRegister src = stub.data <1 >();
2134
+ uint vector_length = stub.data <2 >();
2135
+ __ bind (stub.entry ());
2136
+
2137
+ // following instructions mainly focus on NaN, as riscv does not handle
2138
+ // NaN well with vfwcvt_f_f_v, but the code also works for Inf at the same time.
2139
+ //
2140
+ // construct NaN's in 32 bits from the NaN's in 16 bits,
2141
+ // we need the payloads of non-canonical NaNs to be preserved.
2142
+
2143
+ // adjust vector type to 2 * SEW.
2144
+ __ vsetvli_helper (T_FLOAT, vector_length, Assembler::m1);
2145
+ // widen and sign-extend src data.
2146
+ __ vsext_vf2 (dst, src, Assembler::v0_t );
2147
+ __ mv (t0, 0x7f800000 );
2148
+ // sign-bit was already set via sign-extension if necessary.
2149
+ __ vsll_vi (dst, dst, 13 , Assembler::v0_t );
2150
+ __ vor_vx (dst, dst, t0, Assembler::v0_t );
2151
+
2152
+ __ j (stub.continuation ());
2153
+ #undef __
2154
+ }
2155
+
2156
+ // j.l.Float.float16ToFloat
2157
+ void C2_MacroAssembler::float16_to_float_v (VectorRegister dst, VectorRegister src, uint vector_length) {
2158
+ auto stub = C2CodeStub::make<VectorRegister, VectorRegister, uint >
2159
+ (dst, src, vector_length, 24 , float16_to_float_v_slow_path);
2160
+ assert_different_registers (dst, src);
2161
+
2162
+ // On riscv, NaN needs a special process as vfwcvt_f_f_v does not work in that case.
2163
+ // On riscv, Inf does not need a special process as vfwcvt_f_f_v can handle it correctly.
2164
+ // but we consider to get the slow path to process NaN and Inf at the same time,
2165
+ // as both of them are rare cases, and if we try to get the slow path to handle
2166
+ // only NaN case it would sacrifise the performance for normal cases,
2167
+ // i.e. non-NaN and non-Inf cases.
2168
+
2169
+ vsetvli_helper (BasicType::T_SHORT, vector_length, Assembler::mf2);
2170
+
2171
+ // check whether there is a NaN or +/- Inf.
2172
+ mv (t0, 0x7c00 );
2173
+ vand_vx (v0, src, t0);
2174
+ // v0 will be used as mask in slow path.
2175
+ vmseq_vx (v0, v0, t0);
2176
+ vcpop_m (t0, v0);
2177
+
2178
+ // For non-NaN or non-Inf cases, just use built-in instructions.
2179
+ vfwcvt_f_f_v (dst, src);
2180
+
2181
+ // jump to stub processing NaN and Inf cases if there is any of them in the vector-wide.
2182
+ bnez (t0, stub->entry ());
2183
+
2184
+ bind (stub->continuation ());
2185
+ }
2186
+
2187
+ static void float_to_float16_v_slow_path (C2_MacroAssembler& masm,
2188
+ C2GeneralStub<VectorRegister, VectorRegister, VectorRegister>& stub) {
2189
+ #define __ masm.
2190
+ VectorRegister dst = stub.data <0 >();
2191
+ VectorRegister src = stub.data <1 >();
2192
+ VectorRegister tmp = stub.data <2 >();
2193
+ __ bind (stub.entry ());
2194
+
2195
+ // mul is already set to mf2 in float_to_float16_v.
2196
+
2197
+ // preserve the payloads of non-canonical NaNs.
2198
+ __ vnsra_wi (dst, src, 13 , Assembler::v0_t );
2199
+
2200
+ // preserve the sign bit.
2201
+ __ vnsra_wi (tmp, src, 26 , Assembler::v0_t );
2202
+ __ vsll_vi (tmp, tmp, 10 , Assembler::v0_t );
2203
+ __ mv (t0, 0x3ff );
2204
+ __ vor_vx (tmp, tmp, t0, Assembler::v0_t );
2205
+
2206
+ // get the result by merging sign bit and payloads of preserved non-canonical NaNs.
2207
+ __ vand_vv (dst, dst, tmp, Assembler::v0_t );
2208
+
2209
+ __ j (stub.continuation ());
2210
+ #undef __
2211
+ }
2212
+
2213
+ // j.l.Float.float16ToFloat
2214
+ void C2_MacroAssembler::float_to_float16_v (VectorRegister dst, VectorRegister src, VectorRegister vtmp,
2215
+ Register tmp, uint vector_length) {
2216
+ assert_different_registers (dst, src, vtmp);
2217
+
2218
+ auto stub = C2CodeStub::make<VectorRegister, VectorRegister, VectorRegister>
2219
+ (dst, src, vtmp, 28 , float_to_float16_v_slow_path);
2220
+
2221
+ // On riscv, NaN needs a special process as vfncvt_f_f_w does not work in that case.
2222
+
2223
+ vsetvli_helper (BasicType::T_FLOAT, vector_length, Assembler::m1);
2224
+
2225
+ // check whether there is a NaN.
2226
+ // replace v_fclass with vmseq_vv as performance optimization.
2227
+ vmfne_vv (v0, src, src);
2228
+ vcpop_m (t0, v0);
2229
+
2230
+ vsetvli_helper (BasicType::T_SHORT, vector_length, Assembler::mf2, tmp);
2231
+
2232
+ // For non-NaN cases, just use built-in instructions.
2233
+ vfncvt_f_f_w (dst, src);
2234
+
2235
+ // jump to stub processing NaN cases.
2236
+ bnez (t0, stub->entry ());
2237
+
2238
+ bind (stub->continuation ());
2239
+ }
2240
+
2130
2241
void C2_MacroAssembler::signum_fp_v (VectorRegister dst, VectorRegister one, BasicType bt, int vlen) {
2131
2242
vsetvli_helper (bt, vlen);
2132
2243
0 commit comments