8338126: C2 SuperWord: VectorCastF2HF / vcvtps2ph produces wrong results for vector length 2

Sandhya Viswanathan · Sandhya Viswanathan · commit 153ad911f9fa · 2024-10-21T14:58:43.000Z
Reviewed-by: thartmann, jbhateja, epeter
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
@@ -3686,6 +3686,7 @@ instruct vconvF2HF(vec dst, vec src) %{
 %}
 
 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
+  predicate(n->as_StoreVector()->memory_size() >= 16);
   match(Set mem (StoreVector mem (VectorCastF2HF src)));
   format %{ "vcvtps2ph $mem,$src" %}
   ins_encode %{
diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java
@@ -109,7 +109,8 @@ public class IREncodingPrinter {
         "sve",
         // Riscv64
         "rvv",
-        "zvbb"
+        "zvbb",
+        "zvfh"
     ));
 
     public IREncodingPrinter() {
diff --git a/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java b/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java
@@ -26,9 +26,6 @@
  * @bug 8294588
  * @summary Auto-vectorize Float.floatToFloat16, Float.float16ToFloat APIs
  * @requires vm.compiler2.enabled
- * @requires (os.simpleArch == "x64" & (vm.cpu.features ~= ".*avx512f.*" | vm.cpu.features ~= ".*f16c.*")) |
- *           os.arch == "aarch64" |
- *           (os.arch == "riscv64" & vm.cpu.features ~= ".*zvfh.*")
  * @library /test/lib /
  * @run driver compiler.vectorization.TestFloatConversionsVector
  */
@@ -53,7 +50,9 @@ public static void main(String args[]) {
     }
 
     @Test
-    @IR(counts = {IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"})
+    @IR(counts = {IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"},
+                  applyIfPlatformOr = {"x64", "true", "aarch64", "true", "riscv64", "true"},
+                  applyIfCPUFeatureOr = {"f16c", "true", "avx512f", "true", "zvfh", "true", "asimd", "true", "sve", "true"})
     public void test_float_float16(short[] sout, float[] finp) {
         for (int i = 0; i < finp.length; i++) {
             sout[i] = Float.floatToFloat16(finp[i]);
@@ -67,7 +66,16 @@ public void test_float_float16_strided(short[] sout, float[] finp) {
         }
     }
 
-    @Run(test = {"test_float_float16", "test_float_float16_strided"}, mode = RunMode.STANDALONE)
+    @Test
+    public void test_float_float16_short_vector(short[] sout, float[] finp) {
+        for (int i = 0; i < finp.length; i+= 4) {
+            sout[i+0] = Float.floatToFloat16(finp[i+0]);
+            sout[i+1] = Float.floatToFloat16(finp[i+1]);
+        }
+    }
+
+    @Run(test = {"test_float_float16", "test_float_float16_strided",
+                 "test_float_float16_short_vector"}, mode = RunMode.STANDALONE)
     public void kernel_test_float_float16() {
         finp = new float[ARRLEN];
         sout = new short[ARRLEN];
@@ -93,10 +101,21 @@ public void kernel_test_float_float16() {
         for (int i = 0; i < ARRLEN/2; i++) {
             Asserts.assertEquals(Float.floatToFloat16(finp[i*2]), sout[i*2]);
         }
+
+        for (int i = 0; i < ITERS; i++) {
+            test_float_float16_short_vector(sout, finp);
+        }
+
+        // Verifying the result
+        for (int i = 0; i < ARRLEN; i++) {
+            Asserts.assertEquals(Float.floatToFloat16(finp[i]), sout[i]);
+        }
     }
 
     @Test
-    @IR(counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"})
+    @IR(counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE + "min(max_float, max_short)", "> 0"},
+                  applyIfPlatformOr = {"x64", "true", "aarch64", "true", "riscv64", "true"},
+                  applyIfCPUFeatureOr = {"f16c", "true", "avx512f", "true", "zvfh", "true", "asimd", "true", "sve", "true"})
     public void test_float16_float(float[] fout, short[] sinp) {
         for (int i = 0; i < sinp.length; i++) {
             fout[i] = Float.float16ToFloat(sinp[i]);