diff --git a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
index 63e00765792..1ec5a2b1bcd 100644
--- a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
+++ b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
@@ -116,12 +116,13 @@ public class VectorSupport {
 
     // BasicType codes, for primitives only:
     public static final int
-        T_FLOAT   = 6,
-        T_DOUBLE  = 7,
-        T_BYTE    = 8,
-        T_SHORT   = 9,
-        T_INT     = 10,
-        T_LONG    = 11;
+        T_HALFFLOAT = 5,
+        T_FLOAT     = 6,
+        T_DOUBLE    = 7,
+        T_BYTE      = 8,
+        T_SHORT     = 9,
+        T_INT       = 10,
+        T_LONG      = 11;
 
     /* ============================================================================ */
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java
index 4987546dc36..f7b86536f19 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java
@@ -298,7 +298,15 @@ AbstractVector<E> dummyVector() {
         return makeDummyVector();
     }
     private AbstractVector<E> makeDummyVector() {
-        Object za = Array.newInstance(elementType(), laneCount);
+        Object za;
+        // FIXME: Remove the following special handling for
+        // Halffloat till Valhalla integration when Halffloat
+        // will become a primitive class.
+        if (elementType() == Halffloat.class) {
+           za = Array.newInstance(short.class, laneCount);
+        } else {
+           za = Array.newInstance(elementType(), laneCount);
+        }
         return dummyVector = vectorFactory.apply(za);
         // This is the only use of vectorFactory.
         // All other factory requests are routed
@@ -610,6 +618,8 @@ AbstractSpecies<?> computeSpecies(LaneType laneType,
             s = IntVector.species(shape); break;
         case LaneType.SK_LONG:
             s = LongVector.species(shape); break;
+        case LaneType.SK_HALFFLOAT:
+            s = HalffloatVector.species(shape); break;
         }
         if (s == null) {
             // NOTE: The result of this method is guaranteed to be
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java
index 1ffdb058a27..60f620ebc5f 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java
@@ -250,6 +250,15 @@ public DoubleVector reinterpretAsDoubles() {
         return (DoubleVector) asVectorRaw(LaneType.DOUBLE);
     }
 
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public HalffloatVector reinterpretAsHalffloats() {
+        return (HalffloatVector) asVectorRaw(LaneType.HALFFLOAT);
+    }
+
     /**
      * {@inheritDoc} <!--workaround-->
      */
@@ -521,6 +530,7 @@ AbstractVector<F> defaultReinterpret(AbstractSpecies<F> rsp) {
             return FloatVector.fromByteBuffer(rsp.check(float.class), bb, 0, bo, m.check(float.class)).check0(rsp);
         case LaneType.SK_DOUBLE:
             return DoubleVector.fromByteBuffer(rsp.check(double.class), bb, 0, bo, m.check(double.class)).check0(rsp);
+        // FIXME: Add lanetype for Halffloat
         default:
             throw new AssertionError(rsp.toString());
         }
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat.java
new file mode 100644
index 00000000000..78eb3d687e6
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat.java
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 1994, 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import jdk.internal.vm.annotation.IntrinsicCandidate;
+
+/**
+ * A specialized {@link Vector} representing an ordered immutable sequence of
+ * {@code short} values.
+ * @author abc
+ * @version 1.0
+ * @since  10/01/2021
+ */
+@SuppressWarnings("serial")
+public final class Halffloat extends Number implements Comparable<Halffloat>{
+    /** Definitions for FP16*/
+    public static final short MAX_VALUE = 0x7bff;
+    /** Definitions for FP16 */
+    public static final short MIN_VALUE = 0x400;
+    /** Definitions for FP16 */
+    public static final short POSITIVE_INFINITY = 0x7c00;
+    /** Definitions for FP16 */
+    public static final short NEGATIVE_INFINITY = (short)0xfc00;
+    /** Definitions for FP16*/
+    public static final short NaN = (short)0xffff;
+    /** Definitions for FP16*/
+    private static final float MAX_FLOAT_VALUE = 0x1.ffep+15f;
+    /** Definitions for FP16*/
+    private static final float MIN_FLOAT_VALUE = 0x1.004p-14f;
+    /** Definitions for FP16 */
+    public static final int SIZE = 16;
+    /** Definitions for FP16 */
+    public static final int BYTES = SIZE / Byte.SIZE;
+    /** Definitions for FP16 */
+    private final short value;
+
+    /**
+    * Returns a new Halffloat.
+    * @param f the species describing the element type
+    * @return short value of float provided
+    */
+    public static Halffloat valueOf(short f) {
+        return new Halffloat(f);
+    }
+
+    /**
+    * Halffloat constructor
+    * @param value short value assigned to halffloat
+    */
+    public Halffloat(short value) {
+        this.value = value;
+    }
+
+    /**
+    * Halffloat constructor
+    * @param f float value assigned to halffloat
+    */
+    public Halffloat(float f) {
+        this.value = valueOf(f);
+    }
+
+    /**
+    * Returns floatvalue of a given short value.
+    * @return a float value of short provided
+    */
+    public float floatValue() {
+        int val = (int)value;
+        float result;
+        switch(val) {
+          case Halffloat.POSITIVE_INFINITY:
+               result = Float.POSITIVE_INFINITY;
+               break;
+          case Halffloat.NEGATIVE_INFINITY:
+               result = Float.NEGATIVE_INFINITY;
+               break;
+          case Halffloat.NaN:
+               result = Float.NaN;
+               break;
+          default:
+               result = (Float.intBitsToFloat(((val&0x8000)<<16) | (((val&0x7c00)+0x1C000)<<13) | ((val&0x03FF)<<13)));
+               break;
+       }
+       return result;
+    }
+
+    /**
+     * Returns halffloat value of a given float.
+     * @param f float value to be converted into halffloat
+     * @return short value of float provided
+    */
+    public static short valueOf(float f) {
+        if (f > Halffloat.MAX_FLOAT_VALUE) return Halffloat.POSITIVE_INFINITY;
+        if (Float.isNaN(f)) return Halffloat.NaN;
+
+        if (f < Halffloat.MIN_FLOAT_VALUE) return Halffloat.NEGATIVE_INFINITY;
+
+        int val = Float.floatToIntBits(f);
+        val = ((((val>>16)&0x8000)|((((val&0x7f800000)-0x38000000)>>13)&0x7c00)|((val>>13)&0x03ff)));
+        return (short)val;
+    }
+
+    /** doublevalue */
+    public double doubleValue() {
+       return (double) floatValue();
+    }
+
+    /** longValue */
+    public long longValue() {
+       return (long) value;
+    }
+
+    /** IntValue */
+    public int intValue() {
+       return (int) value;
+    }
+
+    /**
+     * Returns the size, in bits, of vectors of this shape.
+     * @param bits the species describing the element type
+     * @return short value of float provided
+    */
+    public static short shortBitsToHalffloat(short bits) {
+        return bits;
+    }
+    /**
+     * Returns the size, in bits, of vectors of this shape.
+     * @param bits the species describing the element type
+     * @return short value of float provided
+    */
+    public static short shortToRawShortBits(short bits) {
+        return bits;
+    }
+    /**
+     * Returns the size, in bits, of vectors of this shape.
+     * @param bits the species describing the element type
+     * @return short value of float provided
+    */
+    public static short shortToShortBits(short bits) {
+        return bits;
+    }
+
+    /**
+       Compares two halffloats
+     * @param hf value to be compared
+     * @return 0, 1, -1
+    */
+    public int compareTo(Halffloat hf) {
+        float f1 = floatValue();
+        float f2 = hf.floatValue();
+        return Float.compare(f1, f2);
+    }
+}
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java
new file mode 100644
index 00000000000..b1150fc8a08
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java
@@ -0,0 +1,919 @@
+/*
+ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntUnaryOperator;
+
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+@SuppressWarnings("cast")  // warning: redundant cast
+final class Halffloat128Vector extends HalffloatVector {
+    static final HalffloatSpecies VSPECIES =
+        (HalffloatSpecies) HalffloatVector.SPECIES_128;
+
+    static final VectorShape VSHAPE =
+        VSPECIES.vectorShape();
+
+    static final Class<Halffloat128Vector> VCLASS = Halffloat128Vector.class;
+
+    static final int VSIZE = VSPECIES.vectorBitSize();
+
+    static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
+
+    static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+    Halffloat128Vector(short[] v) {
+        super(v);
+    }
+
+    // For compatibility as Halffloat128Vector::new,
+    // stored into species.vectorFactory.
+    Halffloat128Vector(Object v) {
+        this((short[]) v);
+    }
+
+    static final Halffloat128Vector ZERO = new Halffloat128Vector(new short[VLENGTH]);
+    static final Halffloat128Vector IOTA = new Halffloat128Vector(VSPECIES.iotaArray());
+
+    static {
+        // Warm up a few species caches.
+        // If we do this too much we will
+        // get NPEs from bootstrap circularity.
+        VSPECIES.dummyVector();
+        VSPECIES.withLanes(LaneType.BYTE);
+    }
+
+    // Specialized extractors
+
+    @ForceInline
+    final @Override
+    public HalffloatSpecies vspecies() {
+        // ISSUE:  This should probably be a @Stable
+        // field inside AbstractVector, rather than
+        // a megamorphic method.
+        return VSPECIES;
+    }
+
+    @ForceInline
+    @Override
+    public final Class<Halffloat> elementType() { return Halffloat.class; }
+
+    @ForceInline
+    @Override
+    public final int elementSize() { return Halffloat.SIZE; }
+
+    @ForceInline
+    @Override
+    public final VectorShape shape() { return VSHAPE; }
+
+    @ForceInline
+    @Override
+    public final int length() { return VLENGTH; }
+
+    @ForceInline
+    @Override
+    public final int bitSize() { return VSIZE; }
+
+    @ForceInline
+    @Override
+    public final int byteSize() { return VSIZE / Byte.SIZE; }
+
+    /*package-private*/
+    @ForceInline
+    final @Override
+    short[] vec() {
+        return (short[])getPayload();
+    }
+
+    // Virtualized constructors
+
+    @Override
+    @ForceInline
+    public final Halffloat128Vector broadcast(short e) {
+        return (Halffloat128Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat128Vector broadcast(long e) {
+        return (Halffloat128Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    Halffloat128Mask maskFromArray(boolean[] bits) {
+        return new Halffloat128Mask(bits);
+    }
+
+    @Override
+    @ForceInline
+    Halffloat128Shuffle iotaShuffle() { return Halffloat128Shuffle.IOTA; }
+
+    @ForceInline
+    Halffloat128Shuffle iotaShuffle(int start, int step, boolean wrap) {
+      if (wrap) {
+        return (Halffloat128Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat128Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
+      } else {
+        return (Halffloat128Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat128Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
+      }
+    }
+
+    @Override
+    @ForceInline
+    Halffloat128Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat128Shuffle(reorder); }
+
+    @Override
+    @ForceInline
+    Halffloat128Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat128Shuffle(indexes, i); }
+
+    @Override
+    @ForceInline
+    Halffloat128Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat128Shuffle(fn); }
+
+    // Make a vector of the same species but the given elements:
+    @ForceInline
+    final @Override
+    Halffloat128Vector vectorFactory(short[] vec) {
+        return new Halffloat128Vector(vec);
+    }
+
+    @ForceInline
+    final @Override
+    Byte128Vector asByteVectorRaw() {
+        return (Byte128Vector) super.asByteVectorRawTemplate();  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    AbstractVector<?> asVectorRaw(LaneType laneType) {
+        return super.asVectorRawTemplate(laneType);  // specialize
+    }
+
+    // Unary operator
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector uOp(FUnOp f) {
+        return (Halffloat128Vector) super.uOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector uOp(VectorMask<Halffloat> m, FUnOp f) {
+        return (Halffloat128Vector)
+            super.uOpTemplate((Halffloat128Mask)m, f);  // specialize
+    }
+
+    // Binary operator
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector bOp(Vector<Halffloat> v, FBinOp f) {
+        return (Halffloat128Vector) super.bOpTemplate((Halffloat128Vector)v, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector bOp(Vector<Halffloat> v,
+                     VectorMask<Halffloat> m, FBinOp f) {
+        return (Halffloat128Vector)
+            super.bOpTemplate((Halffloat128Vector)v, (Halffloat128Mask)m,
+                              f);  // specialize
+    }
+
+    // Ternary operator
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, FTriOp f) {
+        return (Halffloat128Vector)
+            super.tOpTemplate((Halffloat128Vector)v1, (Halffloat128Vector)v2,
+                              f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat128Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2,
+                     VectorMask<Halffloat> m, FTriOp f) {
+        return (Halffloat128Vector)
+            super.tOpTemplate((Halffloat128Vector)v1, (Halffloat128Vector)v2,
+                              (Halffloat128Mask)m, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    short rOp(short v, VectorMask<Halffloat> m, FBinOp f) {
+        return super.rOpTemplate(v, m, f);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> convertShape(VectorOperators.Conversion<Halffloat,F> conv,
+                           VectorSpecies<F> rsp, int part) {
+        return super.convertShapeTemplate(conv, rsp, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
+        return super.reinterpretShapeTemplate(toSpecies, part);  // specialize
+    }
+
+    // Specialized algebraic operations:
+
+    // The following definition forces a specialized version of this
+    // crucial method into the v-table of this class.  A call to add()
+    // will inline to a call to lanewise(ADD,), at which point the JIT
+    // intrinsic will have the opcode of ADD, plus all the metadata
+    // for this particular class, enabling it to generate precise
+    // code.
+    //
+    // There is probably no benefit to the JIT to specialize the
+    // masked or broadcast versions of the lanewise method.
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector lanewise(Unary op) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector lanewise(Unary op, VectorMask<Halffloat> m) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector lanewise(Binary op, Vector<Halffloat> v) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector lanewise(Binary op, Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, v, (Halffloat128Mask) m);  // specialize
+    }
+
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    public final
+    Halffloat128Vector
+    lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op, v1, v2);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat128Vector
+    lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2, VectorMask<Halffloat> m) {
+        return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, v1, v2, (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat128Vector addIndex(int scale) {
+        return (Halffloat128Vector) super.addIndexTemplate(scale);  // specialize
+    }
+
+    // Type specific horizontal reductions
+
+    @Override
+    @ForceInline
+    public final short reduceLanes(VectorOperators.Associative op) {
+        return super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final short reduceLanes(VectorOperators.Associative op,
+                                    VectorMask<Halffloat> m) {
+        return super.reduceLanesTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op) {
+        return (long) super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op,
+                                        VectorMask<Halffloat> m) {
+        return (long) super.reduceLanesTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m);  // specialized
+    }
+
+    @ForceInline
+    public VectorShuffle<Halffloat> toShuffle() {
+        return super.toShuffleTemplate(Halffloat128Shuffle.class); // specialize
+    }
+
+    // Specialized unary testing
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask test(Test op) {
+        return super.testTemplate(Halffloat128Mask.class, op);  // specialize
+    }
+
+    // Specialized comparisons
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask compare(Comparison op, Vector<Halffloat> v) {
+        return super.compareTemplate(Halffloat128Mask.class, op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask compare(Comparison op, short s) {
+        return super.compareTemplate(Halffloat128Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask compare(Comparison op, long s) {
+        return super.compareTemplate(Halffloat128Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat128Mask compare(Comparison op, Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return super.compareTemplate(Halffloat128Mask.class, op, v, (Halffloat128Mask) m);
+    }
+
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector blend(Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return (Halffloat128Vector)
+            super.blendTemplate(Halffloat128Mask.class,
+                                (Halffloat128Vector) v,
+                                (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector slice(int origin, Vector<Halffloat> v) {
+        return (Halffloat128Vector) super.sliceTemplate(origin, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector slice(int origin) {
+        return (Halffloat128Vector) super.sliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector unslice(int origin, Vector<Halffloat> w, int part) {
+        return (Halffloat128Vector) super.unsliceTemplate(origin, w, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m) {
+        return (Halffloat128Vector)
+            super.unsliceTemplate(Halffloat128Mask.class,
+                                  origin, w, part,
+                                  (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector unslice(int origin) {
+        return (Halffloat128Vector) super.unsliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector rearrange(VectorShuffle<Halffloat> s) {
+        return (Halffloat128Vector)
+            super.rearrangeTemplate(Halffloat128Shuffle.class,
+                                    (Halffloat128Shuffle) s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector rearrange(VectorShuffle<Halffloat> shuffle,
+                                  VectorMask<Halffloat> m) {
+        return (Halffloat128Vector)
+            super.rearrangeTemplate(Halffloat128Shuffle.class,
+                                    Halffloat128Mask.class,
+                                    (Halffloat128Shuffle) shuffle,
+                                    (Halffloat128Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector rearrange(VectorShuffle<Halffloat> s,
+                                  Vector<Halffloat> v) {
+        return (Halffloat128Vector)
+            super.rearrangeTemplate(Halffloat128Shuffle.class,
+                                    (Halffloat128Shuffle) s,
+                                    (Halffloat128Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector selectFrom(Vector<Halffloat> v) {
+        return (Halffloat128Vector)
+            super.selectFromTemplate((Halffloat128Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat128Vector selectFrom(Vector<Halffloat> v,
+                                   VectorMask<Halffloat> m) {
+        return (Halffloat128Vector)
+            super.selectFromTemplate((Halffloat128Vector) v,
+                                     (Halffloat128Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    public short lane(int i) {
+        short bits;
+        switch(i) {
+            case 0: bits = laneHelper(0); break;
+            case 1: bits = laneHelper(1); break;
+            case 2: bits = laneHelper(2); break;
+            case 3: bits = laneHelper(3); break;
+            case 4: bits = laneHelper(4); break;
+            case 5: bits = laneHelper(5); break;
+            case 6: bits = laneHelper(6); break;
+            case 7: bits = laneHelper(7); break;
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        return Halffloat.shortBitsToHalffloat(bits);
+    }
+
+    public short laneHelper(int i) {
+        return (short) VectorSupport.extract(
+                     VCLASS, ETYPE, VLENGTH,
+                     this, i,
+                     (vec, ix) -> {
+                     short[] vecarr = vec.vec();
+                     return (long)Halffloat.shortToShortBits(vecarr[ix]);
+                     });
+    }
+
+    @ForceInline
+    @Override
+    public Halffloat128Vector withLane(int i, short e) {
+        switch(i) {
+            case 0: return withLaneHelper(0, e);
+            case 1: return withLaneHelper(1, e);
+            case 2: return withLaneHelper(2, e);
+            case 3: return withLaneHelper(3, e);
+            case 4: return withLaneHelper(4, e);
+            case 5: return withLaneHelper(5, e);
+            case 6: return withLaneHelper(6, e);
+            case 7: return withLaneHelper(7, e);
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+    }
+
+    public Halffloat128Vector withLaneHelper(int i, short e) {
+        return VectorSupport.insert(
+                                VCLASS, ETYPE, VLENGTH,
+                                this, i, (long)Halffloat.shortToShortBits(e),
+                                (v, ix, bits) -> {
+                                    short[] res = v.vec().clone();
+                                    res[ix] = Halffloat.shortBitsToHalffloat((short)bits);
+                                    return v.vectorFactory(res);
+                                });
+    }
+
+    // Mask
+
+    static final class Halffloat128Mask extends AbstractMask<Halffloat> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+        Halffloat128Mask(boolean[] bits) {
+            this(bits, 0);
+        }
+
+        Halffloat128Mask(boolean[] bits, int offset) {
+            super(prepare(bits, offset));
+        }
+
+        Halffloat128Mask(boolean val) {
+            super(prepare(val));
+        }
+
+        private static boolean[] prepare(boolean[] bits, int offset) {
+            boolean[] newBits = new boolean[VSPECIES.laneCount()];
+            for (int i = 0; i < newBits.length; i++) {
+                newBits[i] = bits[offset + i];
+            }
+            return newBits;
+        }
+
+        private static boolean[] prepare(boolean val) {
+            boolean[] bits = new boolean[VSPECIES.laneCount()];
+            Arrays.fill(bits, val);
+            return bits;
+        }
+
+        @ForceInline
+        final @Override
+        public HalffloatSpecies vspecies() {
+            // ISSUE:  This should probably be a @Stable
+            // field inside AbstractMask, rather than
+            // a megamorphic method.
+            return VSPECIES;
+        }
+
+        @ForceInline
+        boolean[] getBits() {
+            return (boolean[])getPayload();
+        }
+
+        @Override
+        Halffloat128Mask uOp(MUnOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i]);
+            }
+            return new Halffloat128Mask(res);
+        }
+
+        @Override
+        Halffloat128Mask bOp(VectorMask<Halffloat> m, MBinOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            boolean[] mbits = ((Halffloat128Mask)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i], mbits[i]);
+            }
+            return new Halffloat128Mask(res);
+        }
+
+        @ForceInline
+        @Override
+        public final
+        Halffloat128Vector toVector() {
+            return (Halffloat128Vector) super.toVectorTemplate();  // specialize
+        }
+
+        /**
+         * Helper function for lane-wise mask conversions.
+         * This function kicks in after intrinsic failure.
+         */
+        @ForceInline
+        private final <E>
+        VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) {
+            if (length() != dsp.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+            boolean[] maskArray = toArray();
+            return  dsp.maskFactory(maskArray).check(dsp);
+        }
+
+        @Override
+        @ForceInline
+        public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
+            AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+
+            return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                this.getClass(), ETYPE, VLENGTH,
+                species.maskType(), species.elementType(), VLENGTH,
+                this, species,
+                (m, s) -> s.maskFactory(m.toArray()).check(s));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat128Mask eq(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat128Mask m = (Halffloat128Mask)mask;
+            return xor(m.not());
+        }
+
+        // Unary operations
+
+        @Override
+        @ForceInline
+        public Halffloat128Mask not() {
+            return xor(maskAll(true));
+        }
+
+        // Binary operations
+
+        @Override
+        @ForceInline
+        public Halffloat128Mask and(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat128Mask m = (Halffloat128Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat128Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat128Mask or(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat128Mask m = (Halffloat128Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat128Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
+        }
+
+        @ForceInline
+        /* package-private */
+        Halffloat128Mask xor(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat128Mask m = (Halffloat128Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat128Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
+        }
+
+        // Mask Query operations
+
+        @Override
+        @ForceInline
+        public int trueCount() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat128Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> trueCountHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int firstTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat128Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> firstTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int lastTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat128Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> lastTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public long toLong() {
+            if (length() > Long.SIZE) {
+                throw new UnsupportedOperationException("too many lanes for one long");
+            }
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat128Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> toLongHelper(m.getBits()));
+        }
+
+        // Reductions
+
+        @Override
+        @ForceInline
+        public boolean anyTrue() {
+            return VectorSupport.test(BT_ne, Halffloat128Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> anyTrueHelper(((Halffloat128Mask)m).getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public boolean allTrue() {
+            return VectorSupport.test(BT_overflow, Halffloat128Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> allTrueHelper(((Halffloat128Mask)m).getBits()));
+        }
+
+        @ForceInline
+        /*package-private*/
+        static Halffloat128Mask maskAll(boolean bit) {
+            return VectorSupport.broadcastCoerced(Halffloat128Mask.class, short.class, VLENGTH,
+                                                  (bit ? -1 : 0), null,
+                                                  (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
+        }
+        private static final Halffloat128Mask  TRUE_MASK = new Halffloat128Mask(true);
+        private static final Halffloat128Mask FALSE_MASK = new Halffloat128Mask(false);
+
+    }
+
+    // Shuffle
+
+    static final class Halffloat128Shuffle extends AbstractShuffle<Halffloat> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+        Halffloat128Shuffle(byte[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat128Shuffle(int[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat128Shuffle(int[] reorder, int i) {
+            super(VLENGTH, reorder, i);
+        }
+
+        public Halffloat128Shuffle(IntUnaryOperator fn) {
+            super(VLENGTH, fn);
+        }
+
+        @Override
+        public HalffloatSpecies vspecies() {
+            return VSPECIES;
+        }
+
+        static {
+            // There must be enough bits in the shuffle lanes to encode
+            // VLENGTH valid indexes and VLENGTH exceptional ones.
+            assert(VLENGTH < Byte.MAX_VALUE);
+            assert(Byte.MIN_VALUE <= -VLENGTH);
+        }
+        static final Halffloat128Shuffle IOTA = new Halffloat128Shuffle(IDENTITY);
+
+        @Override
+        @ForceInline
+        public Halffloat128Vector toVector() {
+            return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat128Shuffle.class, this, VLENGTH,
+                                                    (s) -> ((Halffloat128Vector)(((AbstractShuffle<Halffloat>)(s)).toVectorTemplate())));
+        }
+
+        @Override
+        @ForceInline
+        public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
+            AbstractSpecies<F> species = (AbstractSpecies<F>) s;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorShuffle length and species length differ");
+            int[] shuffleArray = toArray();
+            return s.shuffleFromArray(shuffleArray, 0).check(s);
+        }
+
+        @ForceInline
+        @Override
+        public Halffloat128Shuffle rearrange(VectorShuffle<Halffloat> shuffle) {
+            Halffloat128Shuffle s = (Halffloat128Shuffle) shuffle;
+            byte[] reorder1 = reorder();
+            byte[] reorder2 = s.reorder();
+            byte[] r = new byte[reorder1.length];
+            for (int i = 0; i < reorder1.length; i++) {
+                int ssi = reorder2[i];
+                r[i] = reorder1[ssi];  // throws on exceptional index
+            }
+            return new Halffloat128Shuffle(r);
+        }
+    }
+
+    // ================================================
+
+    // Specialized low-level memory operations.
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(short[] a, int offset) {
+        return super.fromArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromCharArray0(char[] a, int offset) {
+        return super.fromCharArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromCharArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteArray0(byte[] a, int offset) {
+        return super.fromByteArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromByteArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
+        return super.fromByteBuffer0Template(bb, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) {
+        return super.fromByteBuffer0Template(Halffloat128Mask.class, bb, offset, (Halffloat128Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(short[] a, int offset) {
+        super.intoArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(short[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m);
+    }
+
+
+
+    @ForceInline
+    @Override
+    final
+    void intoByteArray0(byte[] a, int offset) {
+        super.intoByteArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoByteArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) {
+        super.intoByteBuffer0Template(Halffloat128Mask.class, bb, offset, (Halffloat128Mask) m);
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoCharArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m);
+    }
+
+    // End of specialized low-level memory operations.
+
+    // ================================================
+
+}
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java
new file mode 100644
index 00000000000..02269c2f4d8
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java
@@ -0,0 +1,935 @@
+/*
+ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntUnaryOperator;
+
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+@SuppressWarnings("cast")  // warning: redundant cast
+final class Halffloat256Vector extends HalffloatVector {
+    static final HalffloatSpecies VSPECIES =
+        (HalffloatSpecies) HalffloatVector.SPECIES_256;
+
+    static final VectorShape VSHAPE =
+        VSPECIES.vectorShape();
+
+    static final Class<Halffloat256Vector> VCLASS = Halffloat256Vector.class;
+
+    static final int VSIZE = VSPECIES.vectorBitSize();
+
+    static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
+
+    static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+    Halffloat256Vector(short[] v) {
+        super(v);
+    }
+
+    // For compatibility as Halffloat256Vector::new,
+    // stored into species.vectorFactory.
+    Halffloat256Vector(Object v) {
+        this((short[]) v);
+    }
+
+    static final Halffloat256Vector ZERO = new Halffloat256Vector(new short[VLENGTH]);
+    static final Halffloat256Vector IOTA = new Halffloat256Vector(VSPECIES.iotaArray());
+
+    static {
+        // Warm up a few species caches.
+        // If we do this too much we will
+        // get NPEs from bootstrap circularity.
+        VSPECIES.dummyVector();
+        VSPECIES.withLanes(LaneType.BYTE);
+    }
+
+    // Specialized extractors
+
+    @ForceInline
+    final @Override
+    public HalffloatSpecies vspecies() {
+        // ISSUE:  This should probably be a @Stable
+        // field inside AbstractVector, rather than
+        // a megamorphic method.
+        return VSPECIES;
+    }
+
+    @ForceInline
+    @Override
+    public final Class<Halffloat> elementType() { return Halffloat.class; }
+
+    @ForceInline
+    @Override
+    public final int elementSize() { return Halffloat.SIZE; }
+
+    @ForceInline
+    @Override
+    public final VectorShape shape() { return VSHAPE; }
+
+    @ForceInline
+    @Override
+    public final int length() { return VLENGTH; }
+
+    @ForceInline
+    @Override
+    public final int bitSize() { return VSIZE; }
+
+    @ForceInline
+    @Override
+    public final int byteSize() { return VSIZE / Byte.SIZE; }
+
+    /*package-private*/
+    @ForceInline
+    final @Override
+    short[] vec() {
+        return (short[])getPayload();
+    }
+
+    // Virtualized constructors
+
+    @Override
+    @ForceInline
+    public final Halffloat256Vector broadcast(short e) {
+        return (Halffloat256Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat256Vector broadcast(long e) {
+        return (Halffloat256Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    Halffloat256Mask maskFromArray(boolean[] bits) {
+        return new Halffloat256Mask(bits);
+    }
+
+    @Override
+    @ForceInline
+    Halffloat256Shuffle iotaShuffle() { return Halffloat256Shuffle.IOTA; }
+
+    @ForceInline
+    Halffloat256Shuffle iotaShuffle(int start, int step, boolean wrap) {
+      if (wrap) {
+        return (Halffloat256Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat256Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
+      } else {
+        return (Halffloat256Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat256Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
+      }
+    }
+
+    @Override
+    @ForceInline
+    Halffloat256Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat256Shuffle(reorder); }
+
+    @Override
+    @ForceInline
+    Halffloat256Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat256Shuffle(indexes, i); }
+
+    @Override
+    @ForceInline
+    Halffloat256Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat256Shuffle(fn); }
+
+    // Make a vector of the same species but the given elements:
+    @ForceInline
+    final @Override
+    Halffloat256Vector vectorFactory(short[] vec) {
+        return new Halffloat256Vector(vec);
+    }
+
+    @ForceInline
+    final @Override
+    Byte256Vector asByteVectorRaw() {
+        return (Byte256Vector) super.asByteVectorRawTemplate();  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    AbstractVector<?> asVectorRaw(LaneType laneType) {
+        return super.asVectorRawTemplate(laneType);  // specialize
+    }
+
+    // Unary operator
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector uOp(FUnOp f) {
+        return (Halffloat256Vector) super.uOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector uOp(VectorMask<Halffloat> m, FUnOp f) {
+        return (Halffloat256Vector)
+            super.uOpTemplate((Halffloat256Mask)m, f);  // specialize
+    }
+
+    // Binary operator
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector bOp(Vector<Halffloat> v, FBinOp f) {
+        return (Halffloat256Vector) super.bOpTemplate((Halffloat256Vector)v, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector bOp(Vector<Halffloat> v,
+                     VectorMask<Halffloat> m, FBinOp f) {
+        return (Halffloat256Vector)
+            super.bOpTemplate((Halffloat256Vector)v, (Halffloat256Mask)m,
+                              f);  // specialize
+    }
+
+    // Ternary operator
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, FTriOp f) {
+        return (Halffloat256Vector)
+            super.tOpTemplate((Halffloat256Vector)v1, (Halffloat256Vector)v2,
+                              f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat256Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2,
+                     VectorMask<Halffloat> m, FTriOp f) {
+        return (Halffloat256Vector)
+            super.tOpTemplate((Halffloat256Vector)v1, (Halffloat256Vector)v2,
+                              (Halffloat256Mask)m, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    short rOp(short v, VectorMask<Halffloat> m, FBinOp f) {
+        return super.rOpTemplate(v, m, f);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> convertShape(VectorOperators.Conversion<Halffloat,F> conv,
+                           VectorSpecies<F> rsp, int part) {
+        return super.convertShapeTemplate(conv, rsp, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
+        return super.reinterpretShapeTemplate(toSpecies, part);  // specialize
+    }
+
+    // Specialized algebraic operations:
+
+    // The following definition forces a specialized version of this
+    // crucial method into the v-table of this class.  A call to add()
+    // will inline to a call to lanewise(ADD,), at which point the JIT
+    // intrinsic will have the opcode of ADD, plus all the metadata
+    // for this particular class, enabling it to generate precise
+    // code.
+    //
+    // There is probably no benefit to the JIT to specialize the
+    // masked or broadcast versions of the lanewise method.
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector lanewise(Unary op) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector lanewise(Unary op, VectorMask<Halffloat> m) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector lanewise(Binary op, Vector<Halffloat> v) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector lanewise(Binary op, Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, v, (Halffloat256Mask) m);  // specialize
+    }
+
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    public final
+    Halffloat256Vector
+    lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op, v1, v2);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat256Vector
+    lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2, VectorMask<Halffloat> m) {
+        return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, v1, v2, (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat256Vector addIndex(int scale) {
+        return (Halffloat256Vector) super.addIndexTemplate(scale);  // specialize
+    }
+
+    // Type specific horizontal reductions
+
+    @Override
+    @ForceInline
+    public final short reduceLanes(VectorOperators.Associative op) {
+        return super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final short reduceLanes(VectorOperators.Associative op,
+                                    VectorMask<Halffloat> m) {
+        return super.reduceLanesTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op) {
+        return (long) super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op,
+                                        VectorMask<Halffloat> m) {
+        return (long) super.reduceLanesTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m);  // specialized
+    }
+
+    @ForceInline
+    public VectorShuffle<Halffloat> toShuffle() {
+        return super.toShuffleTemplate(Halffloat256Shuffle.class); // specialize
+    }
+
+    // Specialized unary testing
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask test(Test op) {
+        return super.testTemplate(Halffloat256Mask.class, op);  // specialize
+    }
+
+    // Specialized comparisons
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask compare(Comparison op, Vector<Halffloat> v) {
+        return super.compareTemplate(Halffloat256Mask.class, op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask compare(Comparison op, short s) {
+        return super.compareTemplate(Halffloat256Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask compare(Comparison op, long s) {
+        return super.compareTemplate(Halffloat256Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat256Mask compare(Comparison op, Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return super.compareTemplate(Halffloat256Mask.class, op, v, (Halffloat256Mask) m);
+    }
+
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector blend(Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return (Halffloat256Vector)
+            super.blendTemplate(Halffloat256Mask.class,
+                                (Halffloat256Vector) v,
+                                (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector slice(int origin, Vector<Halffloat> v) {
+        return (Halffloat256Vector) super.sliceTemplate(origin, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector slice(int origin) {
+        return (Halffloat256Vector) super.sliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector unslice(int origin, Vector<Halffloat> w, int part) {
+        return (Halffloat256Vector) super.unsliceTemplate(origin, w, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m) {
+        return (Halffloat256Vector)
+            super.unsliceTemplate(Halffloat256Mask.class,
+                                  origin, w, part,
+                                  (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector unslice(int origin) {
+        return (Halffloat256Vector) super.unsliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector rearrange(VectorShuffle<Halffloat> s) {
+        return (Halffloat256Vector)
+            super.rearrangeTemplate(Halffloat256Shuffle.class,
+                                    (Halffloat256Shuffle) s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector rearrange(VectorShuffle<Halffloat> shuffle,
+                                  VectorMask<Halffloat> m) {
+        return (Halffloat256Vector)
+            super.rearrangeTemplate(Halffloat256Shuffle.class,
+                                    Halffloat256Mask.class,
+                                    (Halffloat256Shuffle) shuffle,
+                                    (Halffloat256Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector rearrange(VectorShuffle<Halffloat> s,
+                                  Vector<Halffloat> v) {
+        return (Halffloat256Vector)
+            super.rearrangeTemplate(Halffloat256Shuffle.class,
+                                    (Halffloat256Shuffle) s,
+                                    (Halffloat256Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector selectFrom(Vector<Halffloat> v) {
+        return (Halffloat256Vector)
+            super.selectFromTemplate((Halffloat256Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat256Vector selectFrom(Vector<Halffloat> v,
+                                   VectorMask<Halffloat> m) {
+        return (Halffloat256Vector)
+            super.selectFromTemplate((Halffloat256Vector) v,
+                                     (Halffloat256Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    public short lane(int i) {
+        short bits;
+        switch(i) {
+            case 0: bits = laneHelper(0); break;
+            case 1: bits = laneHelper(1); break;
+            case 2: bits = laneHelper(2); break;
+            case 3: bits = laneHelper(3); break;
+            case 4: bits = laneHelper(4); break;
+            case 5: bits = laneHelper(5); break;
+            case 6: bits = laneHelper(6); break;
+            case 7: bits = laneHelper(7); break;
+            case 8: bits = laneHelper(8); break;
+            case 9: bits = laneHelper(9); break;
+            case 10: bits = laneHelper(10); break;
+            case 11: bits = laneHelper(11); break;
+            case 12: bits = laneHelper(12); break;
+            case 13: bits = laneHelper(13); break;
+            case 14: bits = laneHelper(14); break;
+            case 15: bits = laneHelper(15); break;
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        return Halffloat.shortBitsToHalffloat(bits);
+    }
+
+    public short laneHelper(int i) {
+        return (short) VectorSupport.extract(
+                     VCLASS, ETYPE, VLENGTH,
+                     this, i,
+                     (vec, ix) -> {
+                     short[] vecarr = vec.vec();
+                     return (long)Halffloat.shortToShortBits(vecarr[ix]);
+                     });
+    }
+
+    @ForceInline
+    @Override
+    public Halffloat256Vector withLane(int i, short e) {
+        switch(i) {
+            case 0: return withLaneHelper(0, e);
+            case 1: return withLaneHelper(1, e);
+            case 2: return withLaneHelper(2, e);
+            case 3: return withLaneHelper(3, e);
+            case 4: return withLaneHelper(4, e);
+            case 5: return withLaneHelper(5, e);
+            case 6: return withLaneHelper(6, e);
+            case 7: return withLaneHelper(7, e);
+            case 8: return withLaneHelper(8, e);
+            case 9: return withLaneHelper(9, e);
+            case 10: return withLaneHelper(10, e);
+            case 11: return withLaneHelper(11, e);
+            case 12: return withLaneHelper(12, e);
+            case 13: return withLaneHelper(13, e);
+            case 14: return withLaneHelper(14, e);
+            case 15: return withLaneHelper(15, e);
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+    }
+
+    public Halffloat256Vector withLaneHelper(int i, short e) {
+        return VectorSupport.insert(
+                                VCLASS, ETYPE, VLENGTH,
+                                this, i, (long)Halffloat.shortToShortBits(e),
+                                (v, ix, bits) -> {
+                                    short[] res = v.vec().clone();
+                                    res[ix] = Halffloat.shortBitsToHalffloat((short)bits);
+                                    return v.vectorFactory(res);
+                                });
+    }
+
+    // Mask
+
+    static final class Halffloat256Mask extends AbstractMask<Halffloat> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+        Halffloat256Mask(boolean[] bits) {
+            this(bits, 0);
+        }
+
+        Halffloat256Mask(boolean[] bits, int offset) {
+            super(prepare(bits, offset));
+        }
+
+        Halffloat256Mask(boolean val) {
+            super(prepare(val));
+        }
+
+        private static boolean[] prepare(boolean[] bits, int offset) {
+            boolean[] newBits = new boolean[VSPECIES.laneCount()];
+            for (int i = 0; i < newBits.length; i++) {
+                newBits[i] = bits[offset + i];
+            }
+            return newBits;
+        }
+
+        private static boolean[] prepare(boolean val) {
+            boolean[] bits = new boolean[VSPECIES.laneCount()];
+            Arrays.fill(bits, val);
+            return bits;
+        }
+
+        @ForceInline
+        final @Override
+        public HalffloatSpecies vspecies() {
+            // ISSUE:  This should probably be a @Stable
+            // field inside AbstractMask, rather than
+            // a megamorphic method.
+            return VSPECIES;
+        }
+
+        @ForceInline
+        boolean[] getBits() {
+            return (boolean[])getPayload();
+        }
+
+        @Override
+        Halffloat256Mask uOp(MUnOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i]);
+            }
+            return new Halffloat256Mask(res);
+        }
+
+        @Override
+        Halffloat256Mask bOp(VectorMask<Halffloat> m, MBinOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            boolean[] mbits = ((Halffloat256Mask)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i], mbits[i]);
+            }
+            return new Halffloat256Mask(res);
+        }
+
+        @ForceInline
+        @Override
+        public final
+        Halffloat256Vector toVector() {
+            return (Halffloat256Vector) super.toVectorTemplate();  // specialize
+        }
+
+        /**
+         * Helper function for lane-wise mask conversions.
+         * This function kicks in after intrinsic failure.
+         */
+        @ForceInline
+        private final <E>
+        VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) {
+            if (length() != dsp.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+            boolean[] maskArray = toArray();
+            return  dsp.maskFactory(maskArray).check(dsp);
+        }
+
+        @Override
+        @ForceInline
+        public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
+            AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+
+            return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                this.getClass(), ETYPE, VLENGTH,
+                species.maskType(), species.elementType(), VLENGTH,
+                this, species,
+                (m, s) -> s.maskFactory(m.toArray()).check(s));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat256Mask eq(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat256Mask m = (Halffloat256Mask)mask;
+            return xor(m.not());
+        }
+
+        // Unary operations
+
+        @Override
+        @ForceInline
+        public Halffloat256Mask not() {
+            return xor(maskAll(true));
+        }
+
+        // Binary operations
+
+        @Override
+        @ForceInline
+        public Halffloat256Mask and(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat256Mask m = (Halffloat256Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat256Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat256Mask or(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat256Mask m = (Halffloat256Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat256Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
+        }
+
+        @ForceInline
+        /* package-private */
+        Halffloat256Mask xor(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat256Mask m = (Halffloat256Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat256Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
+        }
+
+        // Mask Query operations
+
+        @Override
+        @ForceInline
+        public int trueCount() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat256Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> trueCountHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int firstTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat256Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> firstTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int lastTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat256Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> lastTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public long toLong() {
+            if (length() > Long.SIZE) {
+                throw new UnsupportedOperationException("too many lanes for one long");
+            }
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat256Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> toLongHelper(m.getBits()));
+        }
+
+        // Reductions
+
+        @Override
+        @ForceInline
+        public boolean anyTrue() {
+            return VectorSupport.test(BT_ne, Halffloat256Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> anyTrueHelper(((Halffloat256Mask)m).getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public boolean allTrue() {
+            return VectorSupport.test(BT_overflow, Halffloat256Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> allTrueHelper(((Halffloat256Mask)m).getBits()));
+        }
+
+        @ForceInline
+        /*package-private*/
+        static Halffloat256Mask maskAll(boolean bit) {
+            return VectorSupport.broadcastCoerced(Halffloat256Mask.class, short.class, VLENGTH,
+                                                  (bit ? -1 : 0), null,
+                                                  (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
+        }
+        private static final Halffloat256Mask  TRUE_MASK = new Halffloat256Mask(true);
+        private static final Halffloat256Mask FALSE_MASK = new Halffloat256Mask(false);
+
+    }
+
+    // Shuffle
+
+    static final class Halffloat256Shuffle extends AbstractShuffle<Halffloat> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+        Halffloat256Shuffle(byte[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat256Shuffle(int[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat256Shuffle(int[] reorder, int i) {
+            super(VLENGTH, reorder, i);
+        }
+
+        public Halffloat256Shuffle(IntUnaryOperator fn) {
+            super(VLENGTH, fn);
+        }
+
+        @Override
+        public HalffloatSpecies vspecies() {
+            return VSPECIES;
+        }
+
+        static {
+            // There must be enough bits in the shuffle lanes to encode
+            // VLENGTH valid indexes and VLENGTH exceptional ones.
+            assert(VLENGTH < Byte.MAX_VALUE);
+            assert(Byte.MIN_VALUE <= -VLENGTH);
+        }
+        static final Halffloat256Shuffle IOTA = new Halffloat256Shuffle(IDENTITY);
+
+        @Override
+        @ForceInline
+        public Halffloat256Vector toVector() {
+            return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat256Shuffle.class, this, VLENGTH,
+                                                    (s) -> ((Halffloat256Vector)(((AbstractShuffle<Halffloat>)(s)).toVectorTemplate())));
+        }
+
+        @Override
+        @ForceInline
+        public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
+            AbstractSpecies<F> species = (AbstractSpecies<F>) s;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorShuffle length and species length differ");
+            int[] shuffleArray = toArray();
+            return s.shuffleFromArray(shuffleArray, 0).check(s);
+        }
+
+        @ForceInline
+        @Override
+        public Halffloat256Shuffle rearrange(VectorShuffle<Halffloat> shuffle) {
+            Halffloat256Shuffle s = (Halffloat256Shuffle) shuffle;
+            byte[] reorder1 = reorder();
+            byte[] reorder2 = s.reorder();
+            byte[] r = new byte[reorder1.length];
+            for (int i = 0; i < reorder1.length; i++) {
+                int ssi = reorder2[i];
+                r[i] = reorder1[ssi];  // throws on exceptional index
+            }
+            return new Halffloat256Shuffle(r);
+        }
+    }
+
+    // ================================================
+
+    // Specialized low-level memory operations.
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(short[] a, int offset) {
+        return super.fromArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromCharArray0(char[] a, int offset) {
+        return super.fromCharArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromCharArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteArray0(byte[] a, int offset) {
+        return super.fromByteArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromByteArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
+        return super.fromByteBuffer0Template(bb, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) {
+        return super.fromByteBuffer0Template(Halffloat256Mask.class, bb, offset, (Halffloat256Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(short[] a, int offset) {
+        super.intoArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(short[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m);
+    }
+
+
+
+    @ForceInline
+    @Override
+    final
+    void intoByteArray0(byte[] a, int offset) {
+        super.intoByteArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoByteArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) {
+        super.intoByteBuffer0Template(Halffloat256Mask.class, bb, offset, (Halffloat256Mask) m);
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoCharArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m);
+    }
+
+    // End of specialized low-level memory operations.
+
+    // ================================================
+
+}
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java
new file mode 100644
index 00000000000..4478c74d31f
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java
@@ -0,0 +1,935 @@
+/*
+ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntUnaryOperator;
+
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+@SuppressWarnings("cast")  // warning: redundant cast
+final class Halffloat512Vector extends HalffloatVector {
+    static final HalffloatSpecies VSPECIES =
+        (HalffloatSpecies) HalffloatVector.SPECIES_512;
+
+    static final VectorShape VSHAPE =
+        VSPECIES.vectorShape();
+
+    static final Class<Halffloat512Vector> VCLASS = Halffloat512Vector.class;
+
+    static final int VSIZE = VSPECIES.vectorBitSize();
+
+    static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
+
+    static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+    Halffloat512Vector(short[] v) {
+        super(v);
+    }
+
+    // For compatibility as Halffloat512Vector::new,
+    // stored into species.vectorFactory.
+    Halffloat512Vector(Object v) {
+        this((short[]) v);
+    }
+
+    static final Halffloat512Vector ZERO = new Halffloat512Vector(new short[VLENGTH]);
+    static final Halffloat512Vector IOTA = new Halffloat512Vector(VSPECIES.iotaArray());
+
+    static {
+        // Warm up a few species caches.
+        // If we do this too much we will
+        // get NPEs from bootstrap circularity.
+        VSPECIES.dummyVector();
+        VSPECIES.withLanes(LaneType.BYTE);
+    }
+
+    // Specialized extractors
+
+    @ForceInline
+    final @Override
+    public HalffloatSpecies vspecies() {
+        // ISSUE:  This should probably be a @Stable
+        // field inside AbstractVector, rather than
+        // a megamorphic method.
+        return VSPECIES;
+    }
+
+    @ForceInline
+    @Override
+    public final Class<Halffloat> elementType() { return Halffloat.class; }
+
+    @ForceInline
+    @Override
+    public final int elementSize() { return Halffloat.SIZE; }
+
+    @ForceInline
+    @Override
+    public final VectorShape shape() { return VSHAPE; }
+
+    @ForceInline
+    @Override
+    public final int length() { return VLENGTH; }
+
+    @ForceInline
+    @Override
+    public final int bitSize() { return VSIZE; }
+
+    @ForceInline
+    @Override
+    public final int byteSize() { return VSIZE / Byte.SIZE; }
+
+    /*package-private*/
+    @ForceInline
+    final @Override
+    short[] vec() {
+        return (short[])getPayload();
+    }
+
+    // Virtualized constructors
+
+    @Override
+    @ForceInline
+    public final Halffloat512Vector broadcast(short e) {
+        return (Halffloat512Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat512Vector broadcast(long e) {
+        return (Halffloat512Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    Halffloat512Mask maskFromArray(boolean[] bits) {
+        return new Halffloat512Mask(bits);
+    }
+
+    @Override
+    @ForceInline
+    Halffloat512Shuffle iotaShuffle() { return Halffloat512Shuffle.IOTA; }
+
+    @ForceInline
+    Halffloat512Shuffle iotaShuffle(int start, int step, boolean wrap) {
+      if (wrap) {
+        return (Halffloat512Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat512Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
+      } else {
+        return (Halffloat512Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat512Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
+      }
+    }
+
+    @Override
+    @ForceInline
+    Halffloat512Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat512Shuffle(reorder); }
+
+    @Override
+    @ForceInline
+    Halffloat512Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat512Shuffle(indexes, i); }
+
+    @Override
+    @ForceInline
+    Halffloat512Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat512Shuffle(fn); }
+
+    // Make a vector of the same species but the given elements:
+    @ForceInline
+    final @Override
+    Halffloat512Vector vectorFactory(short[] vec) {
+        return new Halffloat512Vector(vec);
+    }
+
+    @ForceInline
+    final @Override
+    Byte512Vector asByteVectorRaw() {
+        return (Byte512Vector) super.asByteVectorRawTemplate();  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    AbstractVector<?> asVectorRaw(LaneType laneType) {
+        return super.asVectorRawTemplate(laneType);  // specialize
+    }
+
+    // Unary operator
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector uOp(FUnOp f) {
+        return (Halffloat512Vector) super.uOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector uOp(VectorMask<Halffloat> m, FUnOp f) {
+        return (Halffloat512Vector)
+            super.uOpTemplate((Halffloat512Mask)m, f);  // specialize
+    }
+
+    // Binary operator
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector bOp(Vector<Halffloat> v, FBinOp f) {
+        return (Halffloat512Vector) super.bOpTemplate((Halffloat512Vector)v, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector bOp(Vector<Halffloat> v,
+                     VectorMask<Halffloat> m, FBinOp f) {
+        return (Halffloat512Vector)
+            super.bOpTemplate((Halffloat512Vector)v, (Halffloat512Mask)m,
+                              f);  // specialize
+    }
+
+    // Ternary operator
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, FTriOp f) {
+        return (Halffloat512Vector)
+            super.tOpTemplate((Halffloat512Vector)v1, (Halffloat512Vector)v2,
+                              f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat512Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2,
+                     VectorMask<Halffloat> m, FTriOp f) {
+        return (Halffloat512Vector)
+            super.tOpTemplate((Halffloat512Vector)v1, (Halffloat512Vector)v2,
+                              (Halffloat512Mask)m, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    short rOp(short v, VectorMask<Halffloat> m, FBinOp f) {
+        return super.rOpTemplate(v, m, f);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> convertShape(VectorOperators.Conversion<Halffloat,F> conv,
+                           VectorSpecies<F> rsp, int part) {
+        return super.convertShapeTemplate(conv, rsp, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
+        return super.reinterpretShapeTemplate(toSpecies, part);  // specialize
+    }
+
+    // Specialized algebraic operations:
+
+    // The following definition forces a specialized version of this
+    // crucial method into the v-table of this class.  A call to add()
+    // will inline to a call to lanewise(ADD,), at which point the JIT
+    // intrinsic will have the opcode of ADD, plus all the metadata
+    // for this particular class, enabling it to generate precise
+    // code.
+    //
+    // There is probably no benefit to the JIT to specialize the
+    // masked or broadcast versions of the lanewise method.
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector lanewise(Unary op) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector lanewise(Unary op, VectorMask<Halffloat> m) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector lanewise(Binary op, Vector<Halffloat> v) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector lanewise(Binary op, Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, v, (Halffloat512Mask) m);  // specialize
+    }
+
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    public final
+    Halffloat512Vector
+    lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op, v1, v2);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat512Vector
+    lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2, VectorMask<Halffloat> m) {
+        return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, v1, v2, (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat512Vector addIndex(int scale) {
+        return (Halffloat512Vector) super.addIndexTemplate(scale);  // specialize
+    }
+
+    // Type specific horizontal reductions
+
+    @Override
+    @ForceInline
+    public final short reduceLanes(VectorOperators.Associative op) {
+        return super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final short reduceLanes(VectorOperators.Associative op,
+                                    VectorMask<Halffloat> m) {
+        return super.reduceLanesTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op) {
+        return (long) super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op,
+                                        VectorMask<Halffloat> m) {
+        return (long) super.reduceLanesTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m);  // specialized
+    }
+
+    @ForceInline
+    public VectorShuffle<Halffloat> toShuffle() {
+        return super.toShuffleTemplate(Halffloat512Shuffle.class); // specialize
+    }
+
+    // Specialized unary testing
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask test(Test op) {
+        return super.testTemplate(Halffloat512Mask.class, op);  // specialize
+    }
+
+    // Specialized comparisons
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask compare(Comparison op, Vector<Halffloat> v) {
+        return super.compareTemplate(Halffloat512Mask.class, op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask compare(Comparison op, short s) {
+        return super.compareTemplate(Halffloat512Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask compare(Comparison op, long s) {
+        return super.compareTemplate(Halffloat512Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat512Mask compare(Comparison op, Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return super.compareTemplate(Halffloat512Mask.class, op, v, (Halffloat512Mask) m);
+    }
+
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector blend(Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return (Halffloat512Vector)
+            super.blendTemplate(Halffloat512Mask.class,
+                                (Halffloat512Vector) v,
+                                (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector slice(int origin, Vector<Halffloat> v) {
+        return (Halffloat512Vector) super.sliceTemplate(origin, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector slice(int origin) {
+        return (Halffloat512Vector) super.sliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector unslice(int origin, Vector<Halffloat> w, int part) {
+        return (Halffloat512Vector) super.unsliceTemplate(origin, w, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m) {
+        return (Halffloat512Vector)
+            super.unsliceTemplate(Halffloat512Mask.class,
+                                  origin, w, part,
+                                  (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector unslice(int origin) {
+        return (Halffloat512Vector) super.unsliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector rearrange(VectorShuffle<Halffloat> s) {
+        return (Halffloat512Vector)
+            super.rearrangeTemplate(Halffloat512Shuffle.class,
+                                    (Halffloat512Shuffle) s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector rearrange(VectorShuffle<Halffloat> shuffle,
+                                  VectorMask<Halffloat> m) {
+        return (Halffloat512Vector)
+            super.rearrangeTemplate(Halffloat512Shuffle.class,
+                                    Halffloat512Mask.class,
+                                    (Halffloat512Shuffle) shuffle,
+                                    (Halffloat512Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector rearrange(VectorShuffle<Halffloat> s,
+                                  Vector<Halffloat> v) {
+        return (Halffloat512Vector)
+            super.rearrangeTemplate(Halffloat512Shuffle.class,
+                                    (Halffloat512Shuffle) s,
+                                    (Halffloat512Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector selectFrom(Vector<Halffloat> v) {
+        return (Halffloat512Vector)
+            super.selectFromTemplate((Halffloat512Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat512Vector selectFrom(Vector<Halffloat> v,
+                                   VectorMask<Halffloat> m) {
+        return (Halffloat512Vector)
+            super.selectFromTemplate((Halffloat512Vector) v,
+                                     (Halffloat512Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    public short lane(int i) {
+        short bits;
+        switch(i) {
+            case 0: bits = laneHelper(0); break;
+            case 1: bits = laneHelper(1); break;
+            case 2: bits = laneHelper(2); break;
+            case 3: bits = laneHelper(3); break;
+            case 4: bits = laneHelper(4); break;
+            case 5: bits = laneHelper(5); break;
+            case 6: bits = laneHelper(6); break;
+            case 7: bits = laneHelper(7); break;
+            case 8: bits = laneHelper(8); break;
+            case 9: bits = laneHelper(9); break;
+            case 10: bits = laneHelper(10); break;
+            case 11: bits = laneHelper(11); break;
+            case 12: bits = laneHelper(12); break;
+            case 13: bits = laneHelper(13); break;
+            case 14: bits = laneHelper(14); break;
+            case 15: bits = laneHelper(15); break;
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        return Halffloat.shortBitsToHalffloat(bits);
+    }
+
+    public short laneHelper(int i) {
+        return (short) VectorSupport.extract(
+                     VCLASS, ETYPE, VLENGTH,
+                     this, i,
+                     (vec, ix) -> {
+                     short[] vecarr = vec.vec();
+                     return (long)Halffloat.shortToShortBits(vecarr[ix]);
+                     });
+    }
+
+    @ForceInline
+    @Override
+    public Halffloat512Vector withLane(int i, short e) {
+        switch(i) {
+            case 0: return withLaneHelper(0, e);
+            case 1: return withLaneHelper(1, e);
+            case 2: return withLaneHelper(2, e);
+            case 3: return withLaneHelper(3, e);
+            case 4: return withLaneHelper(4, e);
+            case 5: return withLaneHelper(5, e);
+            case 6: return withLaneHelper(6, e);
+            case 7: return withLaneHelper(7, e);
+            case 8: return withLaneHelper(8, e);
+            case 9: return withLaneHelper(9, e);
+            case 10: return withLaneHelper(10, e);
+            case 11: return withLaneHelper(11, e);
+            case 12: return withLaneHelper(12, e);
+            case 13: return withLaneHelper(13, e);
+            case 14: return withLaneHelper(14, e);
+            case 15: return withLaneHelper(15, e);
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+    }
+
+    public Halffloat512Vector withLaneHelper(int i, short e) {
+        return VectorSupport.insert(
+                                VCLASS, ETYPE, VLENGTH,
+                                this, i, (long)Halffloat.shortToShortBits(e),
+                                (v, ix, bits) -> {
+                                    short[] res = v.vec().clone();
+                                    res[ix] = Halffloat.shortBitsToHalffloat((short)bits);
+                                    return v.vectorFactory(res);
+                                });
+    }
+
+    // Mask
+
+    static final class Halffloat512Mask extends AbstractMask<Halffloat> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+        Halffloat512Mask(boolean[] bits) {
+            this(bits, 0);
+        }
+
+        Halffloat512Mask(boolean[] bits, int offset) {
+            super(prepare(bits, offset));
+        }
+
+        Halffloat512Mask(boolean val) {
+            super(prepare(val));
+        }
+
+        private static boolean[] prepare(boolean[] bits, int offset) {
+            boolean[] newBits = new boolean[VSPECIES.laneCount()];
+            for (int i = 0; i < newBits.length; i++) {
+                newBits[i] = bits[offset + i];
+            }
+            return newBits;
+        }
+
+        private static boolean[] prepare(boolean val) {
+            boolean[] bits = new boolean[VSPECIES.laneCount()];
+            Arrays.fill(bits, val);
+            return bits;
+        }
+
+        @ForceInline
+        final @Override
+        public HalffloatSpecies vspecies() {
+            // ISSUE:  This should probably be a @Stable
+            // field inside AbstractMask, rather than
+            // a megamorphic method.
+            return VSPECIES;
+        }
+
+        @ForceInline
+        boolean[] getBits() {
+            return (boolean[])getPayload();
+        }
+
+        @Override
+        Halffloat512Mask uOp(MUnOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i]);
+            }
+            return new Halffloat512Mask(res);
+        }
+
+        @Override
+        Halffloat512Mask bOp(VectorMask<Halffloat> m, MBinOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            boolean[] mbits = ((Halffloat512Mask)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i], mbits[i]);
+            }
+            return new Halffloat512Mask(res);
+        }
+
+        @ForceInline
+        @Override
+        public final
+        Halffloat512Vector toVector() {
+            return (Halffloat512Vector) super.toVectorTemplate();  // specialize
+        }
+
+        /**
+         * Helper function for lane-wise mask conversions.
+         * This function kicks in after intrinsic failure.
+         */
+        @ForceInline
+        private final <E>
+        VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) {
+            if (length() != dsp.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+            boolean[] maskArray = toArray();
+            return  dsp.maskFactory(maskArray).check(dsp);
+        }
+
+        @Override
+        @ForceInline
+        public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
+            AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+
+            return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                this.getClass(), ETYPE, VLENGTH,
+                species.maskType(), species.elementType(), VLENGTH,
+                this, species,
+                (m, s) -> s.maskFactory(m.toArray()).check(s));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat512Mask eq(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat512Mask m = (Halffloat512Mask)mask;
+            return xor(m.not());
+        }
+
+        // Unary operations
+
+        @Override
+        @ForceInline
+        public Halffloat512Mask not() {
+            return xor(maskAll(true));
+        }
+
+        // Binary operations
+
+        @Override
+        @ForceInline
+        public Halffloat512Mask and(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat512Mask m = (Halffloat512Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat512Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat512Mask or(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat512Mask m = (Halffloat512Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat512Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
+        }
+
+        @ForceInline
+        /* package-private */
+        Halffloat512Mask xor(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat512Mask m = (Halffloat512Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat512Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
+        }
+
+        // Mask Query operations
+
+        @Override
+        @ForceInline
+        public int trueCount() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat512Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> trueCountHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int firstTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat512Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> firstTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int lastTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat512Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> lastTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public long toLong() {
+            if (length() > Long.SIZE) {
+                throw new UnsupportedOperationException("too many lanes for one long");
+            }
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat512Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> toLongHelper(m.getBits()));
+        }
+
+        // Reductions
+
+        @Override
+        @ForceInline
+        public boolean anyTrue() {
+            return VectorSupport.test(BT_ne, Halffloat512Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> anyTrueHelper(((Halffloat512Mask)m).getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public boolean allTrue() {
+            return VectorSupport.test(BT_overflow, Halffloat512Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> allTrueHelper(((Halffloat512Mask)m).getBits()));
+        }
+
+        @ForceInline
+        /*package-private*/
+        static Halffloat512Mask maskAll(boolean bit) {
+            return VectorSupport.broadcastCoerced(Halffloat512Mask.class, short.class, VLENGTH,
+                                                  (bit ? -1 : 0), null,
+                                                  (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
+        }
+        private static final Halffloat512Mask  TRUE_MASK = new Halffloat512Mask(true);
+        private static final Halffloat512Mask FALSE_MASK = new Halffloat512Mask(false);
+
+    }
+
+    // Shuffle
+
+    static final class Halffloat512Shuffle extends AbstractShuffle<Halffloat> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+        Halffloat512Shuffle(byte[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat512Shuffle(int[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat512Shuffle(int[] reorder, int i) {
+            super(VLENGTH, reorder, i);
+        }
+
+        public Halffloat512Shuffle(IntUnaryOperator fn) {
+            super(VLENGTH, fn);
+        }
+
+        @Override
+        public HalffloatSpecies vspecies() {
+            return VSPECIES;
+        }
+
+        static {
+            // There must be enough bits in the shuffle lanes to encode
+            // VLENGTH valid indexes and VLENGTH exceptional ones.
+            assert(VLENGTH < Byte.MAX_VALUE);
+            assert(Byte.MIN_VALUE <= -VLENGTH);
+        }
+        static final Halffloat512Shuffle IOTA = new Halffloat512Shuffle(IDENTITY);
+
+        @Override
+        @ForceInline
+        public Halffloat512Vector toVector() {
+            return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat512Shuffle.class, this, VLENGTH,
+                                                    (s) -> ((Halffloat512Vector)(((AbstractShuffle<Halffloat>)(s)).toVectorTemplate())));
+        }
+
+        @Override
+        @ForceInline
+        public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
+            AbstractSpecies<F> species = (AbstractSpecies<F>) s;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorShuffle length and species length differ");
+            int[] shuffleArray = toArray();
+            return s.shuffleFromArray(shuffleArray, 0).check(s);
+        }
+
+        @ForceInline
+        @Override
+        public Halffloat512Shuffle rearrange(VectorShuffle<Halffloat> shuffle) {
+            Halffloat512Shuffle s = (Halffloat512Shuffle) shuffle;
+            byte[] reorder1 = reorder();
+            byte[] reorder2 = s.reorder();
+            byte[] r = new byte[reorder1.length];
+            for (int i = 0; i < reorder1.length; i++) {
+                int ssi = reorder2[i];
+                r[i] = reorder1[ssi];  // throws on exceptional index
+            }
+            return new Halffloat512Shuffle(r);
+        }
+    }
+
+    // ================================================
+
+    // Specialized low-level memory operations.
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(short[] a, int offset) {
+        return super.fromArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromCharArray0(char[] a, int offset) {
+        return super.fromCharArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromCharArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteArray0(byte[] a, int offset) {
+        return super.fromByteArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromByteArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
+        return super.fromByteBuffer0Template(bb, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) {
+        return super.fromByteBuffer0Template(Halffloat512Mask.class, bb, offset, (Halffloat512Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(short[] a, int offset) {
+        super.intoArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(short[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m);
+    }
+
+
+
+    @ForceInline
+    @Override
+    final
+    void intoByteArray0(byte[] a, int offset) {
+        super.intoByteArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoByteArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) {
+        super.intoByteBuffer0Template(Halffloat512Mask.class, bb, offset, (Halffloat512Mask) m);
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoCharArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m);
+    }
+
+    // End of specialized low-level memory operations.
+
+    // ================================================
+
+}
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java
new file mode 100644
index 00000000000..ccc9ea45ac9
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java
@@ -0,0 +1,911 @@
+/*
+ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntUnaryOperator;
+
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+@SuppressWarnings("cast")  // warning: redundant cast
+final class Halffloat64Vector extends HalffloatVector {
+    static final HalffloatSpecies VSPECIES =
+        (HalffloatSpecies) HalffloatVector.SPECIES_64;
+
+    static final VectorShape VSHAPE =
+        VSPECIES.vectorShape();
+
+    static final Class<Halffloat64Vector> VCLASS = Halffloat64Vector.class;
+
+    static final int VSIZE = VSPECIES.vectorBitSize();
+
+    static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
+
+    static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+    Halffloat64Vector(short[] v) {
+        super(v);
+    }
+
+    // For compatibility as Halffloat64Vector::new,
+    // stored into species.vectorFactory.
+    Halffloat64Vector(Object v) {
+        this((short[]) v);
+    }
+
+    static final Halffloat64Vector ZERO = new Halffloat64Vector(new short[VLENGTH]);
+    static final Halffloat64Vector IOTA = new Halffloat64Vector(VSPECIES.iotaArray());
+
+    static {
+        // Warm up a few species caches.
+        // If we do this too much we will
+        // get NPEs from bootstrap circularity.
+        VSPECIES.dummyVector();
+        VSPECIES.withLanes(LaneType.BYTE);
+    }
+
+    // Specialized extractors
+
+    @ForceInline
+    final @Override
+    public HalffloatSpecies vspecies() {
+        // ISSUE:  This should probably be a @Stable
+        // field inside AbstractVector, rather than
+        // a megamorphic method.
+        return VSPECIES;
+    }
+
+    @ForceInline
+    @Override
+    public final Class<Halffloat> elementType() { return Halffloat.class; }
+
+    @ForceInline
+    @Override
+    public final int elementSize() { return Halffloat.SIZE; }
+
+    @ForceInline
+    @Override
+    public final VectorShape shape() { return VSHAPE; }
+
+    @ForceInline
+    @Override
+    public final int length() { return VLENGTH; }
+
+    @ForceInline
+    @Override
+    public final int bitSize() { return VSIZE; }
+
+    @ForceInline
+    @Override
+    public final int byteSize() { return VSIZE / Byte.SIZE; }
+
+    /*package-private*/
+    @ForceInline
+    final @Override
+    short[] vec() {
+        return (short[])getPayload();
+    }
+
+    // Virtualized constructors
+
+    @Override
+    @ForceInline
+    public final Halffloat64Vector broadcast(short e) {
+        return (Halffloat64Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat64Vector broadcast(long e) {
+        return (Halffloat64Vector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    Halffloat64Mask maskFromArray(boolean[] bits) {
+        return new Halffloat64Mask(bits);
+    }
+
+    @Override
+    @ForceInline
+    Halffloat64Shuffle iotaShuffle() { return Halffloat64Shuffle.IOTA; }
+
+    @ForceInline
+    Halffloat64Shuffle iotaShuffle(int start, int step, boolean wrap) {
+      if (wrap) {
+        return (Halffloat64Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat64Shuffle.class, VSPECIES, VLENGTH, start, step, 1,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
+      } else {
+        return (Halffloat64Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat64Shuffle.class, VSPECIES, VLENGTH, start, step, 0,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
+      }
+    }
+
+    @Override
+    @ForceInline
+    Halffloat64Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat64Shuffle(reorder); }
+
+    @Override
+    @ForceInline
+    Halffloat64Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat64Shuffle(indexes, i); }
+
+    @Override
+    @ForceInline
+    Halffloat64Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat64Shuffle(fn); }
+
+    // Make a vector of the same species but the given elements:
+    @ForceInline
+    final @Override
+    Halffloat64Vector vectorFactory(short[] vec) {
+        return new Halffloat64Vector(vec);
+    }
+
+    @ForceInline
+    final @Override
+    Byte64Vector asByteVectorRaw() {
+        return (Byte64Vector) super.asByteVectorRawTemplate();  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    AbstractVector<?> asVectorRaw(LaneType laneType) {
+        return super.asVectorRawTemplate(laneType);  // specialize
+    }
+
+    // Unary operator
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector uOp(FUnOp f) {
+        return (Halffloat64Vector) super.uOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector uOp(VectorMask<Halffloat> m, FUnOp f) {
+        return (Halffloat64Vector)
+            super.uOpTemplate((Halffloat64Mask)m, f);  // specialize
+    }
+
+    // Binary operator
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector bOp(Vector<Halffloat> v, FBinOp f) {
+        return (Halffloat64Vector) super.bOpTemplate((Halffloat64Vector)v, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector bOp(Vector<Halffloat> v,
+                     VectorMask<Halffloat> m, FBinOp f) {
+        return (Halffloat64Vector)
+            super.bOpTemplate((Halffloat64Vector)v, (Halffloat64Mask)m,
+                              f);  // specialize
+    }
+
+    // Ternary operator
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, FTriOp f) {
+        return (Halffloat64Vector)
+            super.tOpTemplate((Halffloat64Vector)v1, (Halffloat64Vector)v2,
+                              f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    Halffloat64Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2,
+                     VectorMask<Halffloat> m, FTriOp f) {
+        return (Halffloat64Vector)
+            super.tOpTemplate((Halffloat64Vector)v1, (Halffloat64Vector)v2,
+                              (Halffloat64Mask)m, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    short rOp(short v, VectorMask<Halffloat> m, FBinOp f) {
+        return super.rOpTemplate(v, m, f);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> convertShape(VectorOperators.Conversion<Halffloat,F> conv,
+                           VectorSpecies<F> rsp, int part) {
+        return super.convertShapeTemplate(conv, rsp, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
+        return super.reinterpretShapeTemplate(toSpecies, part);  // specialize
+    }
+
+    // Specialized algebraic operations:
+
+    // The following definition forces a specialized version of this
+    // crucial method into the v-table of this class.  A call to add()
+    // will inline to a call to lanewise(ADD,), at which point the JIT
+    // intrinsic will have the opcode of ADD, plus all the metadata
+    // for this particular class, enabling it to generate precise
+    // code.
+    //
+    // There is probably no benefit to the JIT to specialize the
+    // masked or broadcast versions of the lanewise method.
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector lanewise(Unary op) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector lanewise(Unary op, VectorMask<Halffloat> m) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector lanewise(Binary op, Vector<Halffloat> v) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector lanewise(Binary op, Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, v, (Halffloat64Mask) m);  // specialize
+    }
+
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    public final
+    Halffloat64Vector
+    lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op, v1, v2);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat64Vector
+    lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2, VectorMask<Halffloat> m) {
+        return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, v1, v2, (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    Halffloat64Vector addIndex(int scale) {
+        return (Halffloat64Vector) super.addIndexTemplate(scale);  // specialize
+    }
+
+    // Type specific horizontal reductions
+
+    @Override
+    @ForceInline
+    public final short reduceLanes(VectorOperators.Associative op) {
+        return super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final short reduceLanes(VectorOperators.Associative op,
+                                    VectorMask<Halffloat> m) {
+        return super.reduceLanesTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op) {
+        return (long) super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op,
+                                        VectorMask<Halffloat> m) {
+        return (long) super.reduceLanesTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m);  // specialized
+    }
+
+    @ForceInline
+    public VectorShuffle<Halffloat> toShuffle() {
+        return super.toShuffleTemplate(Halffloat64Shuffle.class); // specialize
+    }
+
+    // Specialized unary testing
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask test(Test op) {
+        return super.testTemplate(Halffloat64Mask.class, op);  // specialize
+    }
+
+    // Specialized comparisons
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask compare(Comparison op, Vector<Halffloat> v) {
+        return super.compareTemplate(Halffloat64Mask.class, op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask compare(Comparison op, short s) {
+        return super.compareTemplate(Halffloat64Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask compare(Comparison op, long s) {
+        return super.compareTemplate(Halffloat64Mask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final Halffloat64Mask compare(Comparison op, Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return super.compareTemplate(Halffloat64Mask.class, op, v, (Halffloat64Mask) m);
+    }
+
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector blend(Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return (Halffloat64Vector)
+            super.blendTemplate(Halffloat64Mask.class,
+                                (Halffloat64Vector) v,
+                                (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector slice(int origin, Vector<Halffloat> v) {
+        return (Halffloat64Vector) super.sliceTemplate(origin, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector slice(int origin) {
+        return (Halffloat64Vector) super.sliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector unslice(int origin, Vector<Halffloat> w, int part) {
+        return (Halffloat64Vector) super.unsliceTemplate(origin, w, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m) {
+        return (Halffloat64Vector)
+            super.unsliceTemplate(Halffloat64Mask.class,
+                                  origin, w, part,
+                                  (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector unslice(int origin) {
+        return (Halffloat64Vector) super.unsliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector rearrange(VectorShuffle<Halffloat> s) {
+        return (Halffloat64Vector)
+            super.rearrangeTemplate(Halffloat64Shuffle.class,
+                                    (Halffloat64Shuffle) s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector rearrange(VectorShuffle<Halffloat> shuffle,
+                                  VectorMask<Halffloat> m) {
+        return (Halffloat64Vector)
+            super.rearrangeTemplate(Halffloat64Shuffle.class,
+                                    Halffloat64Mask.class,
+                                    (Halffloat64Shuffle) shuffle,
+                                    (Halffloat64Mask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector rearrange(VectorShuffle<Halffloat> s,
+                                  Vector<Halffloat> v) {
+        return (Halffloat64Vector)
+            super.rearrangeTemplate(Halffloat64Shuffle.class,
+                                    (Halffloat64Shuffle) s,
+                                    (Halffloat64Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector selectFrom(Vector<Halffloat> v) {
+        return (Halffloat64Vector)
+            super.selectFromTemplate((Halffloat64Vector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public Halffloat64Vector selectFrom(Vector<Halffloat> v,
+                                   VectorMask<Halffloat> m) {
+        return (Halffloat64Vector)
+            super.selectFromTemplate((Halffloat64Vector) v,
+                                     (Halffloat64Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    public short lane(int i) {
+        short bits;
+        switch(i) {
+            case 0: bits = laneHelper(0); break;
+            case 1: bits = laneHelper(1); break;
+            case 2: bits = laneHelper(2); break;
+            case 3: bits = laneHelper(3); break;
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        return Halffloat.shortBitsToHalffloat(bits);
+    }
+
+    public short laneHelper(int i) {
+        return (short) VectorSupport.extract(
+                     VCLASS, ETYPE, VLENGTH,
+                     this, i,
+                     (vec, ix) -> {
+                     short[] vecarr = vec.vec();
+                     return (long)Halffloat.shortToShortBits(vecarr[ix]);
+                     });
+    }
+
+    @ForceInline
+    @Override
+    public Halffloat64Vector withLane(int i, short e) {
+        switch(i) {
+            case 0: return withLaneHelper(0, e);
+            case 1: return withLaneHelper(1, e);
+            case 2: return withLaneHelper(2, e);
+            case 3: return withLaneHelper(3, e);
+            default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+    }
+
+    public Halffloat64Vector withLaneHelper(int i, short e) {
+        return VectorSupport.insert(
+                                VCLASS, ETYPE, VLENGTH,
+                                this, i, (long)Halffloat.shortToShortBits(e),
+                                (v, ix, bits) -> {
+                                    short[] res = v.vec().clone();
+                                    res[ix] = Halffloat.shortBitsToHalffloat((short)bits);
+                                    return v.vectorFactory(res);
+                                });
+    }
+
+    // Mask
+
+    static final class Halffloat64Mask extends AbstractMask<Halffloat> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+        Halffloat64Mask(boolean[] bits) {
+            this(bits, 0);
+        }
+
+        Halffloat64Mask(boolean[] bits, int offset) {
+            super(prepare(bits, offset));
+        }
+
+        Halffloat64Mask(boolean val) {
+            super(prepare(val));
+        }
+
+        private static boolean[] prepare(boolean[] bits, int offset) {
+            boolean[] newBits = new boolean[VSPECIES.laneCount()];
+            for (int i = 0; i < newBits.length; i++) {
+                newBits[i] = bits[offset + i];
+            }
+            return newBits;
+        }
+
+        private static boolean[] prepare(boolean val) {
+            boolean[] bits = new boolean[VSPECIES.laneCount()];
+            Arrays.fill(bits, val);
+            return bits;
+        }
+
+        @ForceInline
+        final @Override
+        public HalffloatSpecies vspecies() {
+            // ISSUE:  This should probably be a @Stable
+            // field inside AbstractMask, rather than
+            // a megamorphic method.
+            return VSPECIES;
+        }
+
+        @ForceInline
+        boolean[] getBits() {
+            return (boolean[])getPayload();
+        }
+
+        @Override
+        Halffloat64Mask uOp(MUnOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i]);
+            }
+            return new Halffloat64Mask(res);
+        }
+
+        @Override
+        Halffloat64Mask bOp(VectorMask<Halffloat> m, MBinOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            boolean[] mbits = ((Halffloat64Mask)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i], mbits[i]);
+            }
+            return new Halffloat64Mask(res);
+        }
+
+        @ForceInline
+        @Override
+        public final
+        Halffloat64Vector toVector() {
+            return (Halffloat64Vector) super.toVectorTemplate();  // specialize
+        }
+
+        /**
+         * Helper function for lane-wise mask conversions.
+         * This function kicks in after intrinsic failure.
+         */
+        @ForceInline
+        private final <E>
+        VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) {
+            if (length() != dsp.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+            boolean[] maskArray = toArray();
+            return  dsp.maskFactory(maskArray).check(dsp);
+        }
+
+        @Override
+        @ForceInline
+        public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
+            AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+
+            return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                this.getClass(), ETYPE, VLENGTH,
+                species.maskType(), species.elementType(), VLENGTH,
+                this, species,
+                (m, s) -> s.maskFactory(m.toArray()).check(s));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat64Mask eq(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat64Mask m = (Halffloat64Mask)mask;
+            return xor(m.not());
+        }
+
+        // Unary operations
+
+        @Override
+        @ForceInline
+        public Halffloat64Mask not() {
+            return xor(maskAll(true));
+        }
+
+        // Binary operations
+
+        @Override
+        @ForceInline
+        public Halffloat64Mask and(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat64Mask m = (Halffloat64Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat64Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
+        }
+
+        @Override
+        @ForceInline
+        public Halffloat64Mask or(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat64Mask m = (Halffloat64Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat64Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
+        }
+
+        @ForceInline
+        /* package-private */
+        Halffloat64Mask xor(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            Halffloat64Mask m = (Halffloat64Mask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat64Mask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
+        }
+
+        // Mask Query operations
+
+        @Override
+        @ForceInline
+        public int trueCount() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat64Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> trueCountHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int firstTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat64Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> firstTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int lastTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat64Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> lastTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public long toLong() {
+            if (length() > Long.SIZE) {
+                throw new UnsupportedOperationException("too many lanes for one long");
+            }
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat64Mask.class, short.class, VLENGTH, this,
+                                                      (m) -> toLongHelper(m.getBits()));
+        }
+
+        // Reductions
+
+        @Override
+        @ForceInline
+        public boolean anyTrue() {
+            return VectorSupport.test(BT_ne, Halffloat64Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> anyTrueHelper(((Halffloat64Mask)m).getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public boolean allTrue() {
+            return VectorSupport.test(BT_overflow, Halffloat64Mask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> allTrueHelper(((Halffloat64Mask)m).getBits()));
+        }
+
+        @ForceInline
+        /*package-private*/
+        static Halffloat64Mask maskAll(boolean bit) {
+            return VectorSupport.broadcastCoerced(Halffloat64Mask.class, short.class, VLENGTH,
+                                                  (bit ? -1 : 0), null,
+                                                  (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
+        }
+        private static final Halffloat64Mask  TRUE_MASK = new Halffloat64Mask(true);
+        private static final Halffloat64Mask FALSE_MASK = new Halffloat64Mask(false);
+
+    }
+
+    // Shuffle
+
+    static final class Halffloat64Shuffle extends AbstractShuffle<Halffloat> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+        Halffloat64Shuffle(byte[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat64Shuffle(int[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public Halffloat64Shuffle(int[] reorder, int i) {
+            super(VLENGTH, reorder, i);
+        }
+
+        public Halffloat64Shuffle(IntUnaryOperator fn) {
+            super(VLENGTH, fn);
+        }
+
+        @Override
+        public HalffloatSpecies vspecies() {
+            return VSPECIES;
+        }
+
+        static {
+            // There must be enough bits in the shuffle lanes to encode
+            // VLENGTH valid indexes and VLENGTH exceptional ones.
+            assert(VLENGTH < Byte.MAX_VALUE);
+            assert(Byte.MIN_VALUE <= -VLENGTH);
+        }
+        static final Halffloat64Shuffle IOTA = new Halffloat64Shuffle(IDENTITY);
+
+        @Override
+        @ForceInline
+        public Halffloat64Vector toVector() {
+            return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat64Shuffle.class, this, VLENGTH,
+                                                    (s) -> ((Halffloat64Vector)(((AbstractShuffle<Halffloat>)(s)).toVectorTemplate())));
+        }
+
+        @Override
+        @ForceInline
+        public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
+            AbstractSpecies<F> species = (AbstractSpecies<F>) s;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorShuffle length and species length differ");
+            int[] shuffleArray = toArray();
+            return s.shuffleFromArray(shuffleArray, 0).check(s);
+        }
+
+        @ForceInline
+        @Override
+        public Halffloat64Shuffle rearrange(VectorShuffle<Halffloat> shuffle) {
+            Halffloat64Shuffle s = (Halffloat64Shuffle) shuffle;
+            byte[] reorder1 = reorder();
+            byte[] reorder2 = s.reorder();
+            byte[] r = new byte[reorder1.length];
+            for (int i = 0; i < reorder1.length; i++) {
+                int ssi = reorder2[i];
+                r[i] = reorder1[ssi];  // throws on exceptional index
+            }
+            return new Halffloat64Shuffle(r);
+        }
+    }
+
+    // ================================================
+
+    // Specialized low-level memory operations.
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(short[] a, int offset) {
+        return super.fromArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromCharArray0(char[] a, int offset) {
+        return super.fromCharArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromCharArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteArray0(byte[] a, int offset) {
+        return super.fromByteArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromByteArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
+        return super.fromByteBuffer0Template(bb, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) {
+        return super.fromByteBuffer0Template(Halffloat64Mask.class, bb, offset, (Halffloat64Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(short[] a, int offset) {
+        super.intoArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(short[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m);
+    }
+
+
+
+    @ForceInline
+    @Override
+    final
+    void intoByteArray0(byte[] a, int offset) {
+        super.intoByteArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoByteArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) {
+        super.intoByteBuffer0Template(Halffloat64Mask.class, bb, offset, (Halffloat64Mask) m);
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoCharArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m);
+    }
+
+    // End of specialized low-level memory operations.
+
+    // ================================================
+
+}
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java
new file mode 100644
index 00000000000..7dbbb93de0a
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java
@@ -0,0 +1,904 @@
+/*
+ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntUnaryOperator;
+
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+@SuppressWarnings("cast")  // warning: redundant cast
+final class HalffloatMaxVector extends HalffloatVector {
+    static final HalffloatSpecies VSPECIES =
+        (HalffloatSpecies) HalffloatVector.SPECIES_MAX;
+
+    static final VectorShape VSHAPE =
+        VSPECIES.vectorShape();
+
+    static final Class<HalffloatMaxVector> VCLASS = HalffloatMaxVector.class;
+
+    static final int VSIZE = VSPECIES.vectorBitSize();
+
+    static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
+
+    static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+    HalffloatMaxVector(short[] v) {
+        super(v);
+    }
+
+    // For compatibility as HalffloatMaxVector::new,
+    // stored into species.vectorFactory.
+    HalffloatMaxVector(Object v) {
+        this((short[]) v);
+    }
+
+    static final HalffloatMaxVector ZERO = new HalffloatMaxVector(new short[VLENGTH]);
+    static final HalffloatMaxVector IOTA = new HalffloatMaxVector(VSPECIES.iotaArray());
+
+    static {
+        // Warm up a few species caches.
+        // If we do this too much we will
+        // get NPEs from bootstrap circularity.
+        VSPECIES.dummyVector();
+        VSPECIES.withLanes(LaneType.BYTE);
+    }
+
+    // Specialized extractors
+
+    @ForceInline
+    final @Override
+    public HalffloatSpecies vspecies() {
+        // ISSUE:  This should probably be a @Stable
+        // field inside AbstractVector, rather than
+        // a megamorphic method.
+        return VSPECIES;
+    }
+
+    @ForceInline
+    @Override
+    public final Class<Halffloat> elementType() { return Halffloat.class; }
+
+    @ForceInline
+    @Override
+    public final int elementSize() { return Halffloat.SIZE; }
+
+    @ForceInline
+    @Override
+    public final VectorShape shape() { return VSHAPE; }
+
+    @ForceInline
+    @Override
+    public final int length() { return VLENGTH; }
+
+    @ForceInline
+    @Override
+    public final int bitSize() { return VSIZE; }
+
+    @ForceInline
+    @Override
+    public final int byteSize() { return VSIZE / Byte.SIZE; }
+
+    /*package-private*/
+    @ForceInline
+    final @Override
+    short[] vec() {
+        return (short[])getPayload();
+    }
+
+    // Virtualized constructors
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxVector broadcast(short e) {
+        return (HalffloatMaxVector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxVector broadcast(long e) {
+        return (HalffloatMaxVector) super.broadcastTemplate(e);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    HalffloatMaxMask maskFromArray(boolean[] bits) {
+        return new HalffloatMaxMask(bits);
+    }
+
+    @Override
+    @ForceInline
+    HalffloatMaxShuffle iotaShuffle() { return HalffloatMaxShuffle.IOTA; }
+
+    @ForceInline
+    HalffloatMaxShuffle iotaShuffle(int start, int step, boolean wrap) {
+      if (wrap) {
+        return (HalffloatMaxShuffle)VectorSupport.shuffleIota(ETYPE, HalffloatMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l))));
+      } else {
+        return (HalffloatMaxShuffle)VectorSupport.shuffleIota(ETYPE, HalffloatMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0,
+                (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart)));
+      }
+    }
+
+    @Override
+    @ForceInline
+    HalffloatMaxShuffle shuffleFromBytes(byte[] reorder) { return new HalffloatMaxShuffle(reorder); }
+
+    @Override
+    @ForceInline
+    HalffloatMaxShuffle shuffleFromArray(int[] indexes, int i) { return new HalffloatMaxShuffle(indexes, i); }
+
+    @Override
+    @ForceInline
+    HalffloatMaxShuffle shuffleFromOp(IntUnaryOperator fn) { return new HalffloatMaxShuffle(fn); }
+
+    // Make a vector of the same species but the given elements:
+    @ForceInline
+    final @Override
+    HalffloatMaxVector vectorFactory(short[] vec) {
+        return new HalffloatMaxVector(vec);
+    }
+
+    @ForceInline
+    final @Override
+    ByteMaxVector asByteVectorRaw() {
+        return (ByteMaxVector) super.asByteVectorRawTemplate();  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    AbstractVector<?> asVectorRaw(LaneType laneType) {
+        return super.asVectorRawTemplate(laneType);  // specialize
+    }
+
+    // Unary operator
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector uOp(FUnOp f) {
+        return (HalffloatMaxVector) super.uOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector uOp(VectorMask<Halffloat> m, FUnOp f) {
+        return (HalffloatMaxVector)
+            super.uOpTemplate((HalffloatMaxMask)m, f);  // specialize
+    }
+
+    // Binary operator
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector bOp(Vector<Halffloat> v, FBinOp f) {
+        return (HalffloatMaxVector) super.bOpTemplate((HalffloatMaxVector)v, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector bOp(Vector<Halffloat> v,
+                     VectorMask<Halffloat> m, FBinOp f) {
+        return (HalffloatMaxVector)
+            super.bOpTemplate((HalffloatMaxVector)v, (HalffloatMaxMask)m,
+                              f);  // specialize
+    }
+
+    // Ternary operator
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, FTriOp f) {
+        return (HalffloatMaxVector)
+            super.tOpTemplate((HalffloatMaxVector)v1, (HalffloatMaxVector)v2,
+                              f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    HalffloatMaxVector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2,
+                     VectorMask<Halffloat> m, FTriOp f) {
+        return (HalffloatMaxVector)
+            super.tOpTemplate((HalffloatMaxVector)v1, (HalffloatMaxVector)v2,
+                              (HalffloatMaxMask)m, f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    short rOp(short v, VectorMask<Halffloat> m, FBinOp f) {
+        return super.rOpTemplate(v, m, f);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> convertShape(VectorOperators.Conversion<Halffloat,F> conv,
+                           VectorSpecies<F> rsp, int part) {
+        return super.convertShapeTemplate(conv, rsp, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final <F>
+    Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) {
+        return super.reinterpretShapeTemplate(toSpecies, part);  // specialize
+    }
+
+    // Specialized algebraic operations:
+
+    // The following definition forces a specialized version of this
+    // crucial method into the v-table of this class.  A call to add()
+    // will inline to a call to lanewise(ADD,), at which point the JIT
+    // intrinsic will have the opcode of ADD, plus all the metadata
+    // for this particular class, enabling it to generate precise
+    // code.
+    //
+    // There is probably no benefit to the JIT to specialize the
+    // masked or broadcast versions of the lanewise method.
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector lanewise(Unary op) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector lanewise(Unary op, VectorMask<Halffloat> m) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector lanewise(Binary op, Vector<Halffloat> v) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector lanewise(Binary op, Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, v, (HalffloatMaxMask) m);  // specialize
+    }
+
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    public final
+    HalffloatMaxVector
+    lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op, v1, v2);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    HalffloatMaxVector
+    lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2, VectorMask<Halffloat> m) {
+        return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, v1, v2, (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final
+    HalffloatMaxVector addIndex(int scale) {
+        return (HalffloatMaxVector) super.addIndexTemplate(scale);  // specialize
+    }
+
+    // Type specific horizontal reductions
+
+    @Override
+    @ForceInline
+    public final short reduceLanes(VectorOperators.Associative op) {
+        return super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final short reduceLanes(VectorOperators.Associative op,
+                                    VectorMask<Halffloat> m) {
+        return super.reduceLanesTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op) {
+        return (long) super.reduceLanesTemplate(op);  // specialized
+    }
+
+    @Override
+    @ForceInline
+    public final long reduceLanesToLong(VectorOperators.Associative op,
+                                        VectorMask<Halffloat> m) {
+        return (long) super.reduceLanesTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m);  // specialized
+    }
+
+    @ForceInline
+    public VectorShuffle<Halffloat> toShuffle() {
+        return super.toShuffleTemplate(HalffloatMaxShuffle.class); // specialize
+    }
+
+    // Specialized unary testing
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask test(Test op) {
+        return super.testTemplate(HalffloatMaxMask.class, op);  // specialize
+    }
+
+    // Specialized comparisons
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask compare(Comparison op, Vector<Halffloat> v) {
+        return super.compareTemplate(HalffloatMaxMask.class, op, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask compare(Comparison op, short s) {
+        return super.compareTemplate(HalffloatMaxMask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask compare(Comparison op, long s) {
+        return super.compareTemplate(HalffloatMaxMask.class, op, s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public final HalffloatMaxMask compare(Comparison op, Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return super.compareTemplate(HalffloatMaxMask.class, op, v, (HalffloatMaxMask) m);
+    }
+
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector blend(Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        return (HalffloatMaxVector)
+            super.blendTemplate(HalffloatMaxMask.class,
+                                (HalffloatMaxVector) v,
+                                (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector slice(int origin, Vector<Halffloat> v) {
+        return (HalffloatMaxVector) super.sliceTemplate(origin, v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector slice(int origin) {
+        return (HalffloatMaxVector) super.sliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector unslice(int origin, Vector<Halffloat> w, int part) {
+        return (HalffloatMaxVector) super.unsliceTemplate(origin, w, part);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m) {
+        return (HalffloatMaxVector)
+            super.unsliceTemplate(HalffloatMaxMask.class,
+                                  origin, w, part,
+                                  (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector unslice(int origin) {
+        return (HalffloatMaxVector) super.unsliceTemplate(origin);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector rearrange(VectorShuffle<Halffloat> s) {
+        return (HalffloatMaxVector)
+            super.rearrangeTemplate(HalffloatMaxShuffle.class,
+                                    (HalffloatMaxShuffle) s);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector rearrange(VectorShuffle<Halffloat> shuffle,
+                                  VectorMask<Halffloat> m) {
+        return (HalffloatMaxVector)
+            super.rearrangeTemplate(HalffloatMaxShuffle.class,
+                                    HalffloatMaxMask.class,
+                                    (HalffloatMaxShuffle) shuffle,
+                                    (HalffloatMaxMask) m);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector rearrange(VectorShuffle<Halffloat> s,
+                                  Vector<Halffloat> v) {
+        return (HalffloatMaxVector)
+            super.rearrangeTemplate(HalffloatMaxShuffle.class,
+                                    (HalffloatMaxShuffle) s,
+                                    (HalffloatMaxVector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector selectFrom(Vector<Halffloat> v) {
+        return (HalffloatMaxVector)
+            super.selectFromTemplate((HalffloatMaxVector) v);  // specialize
+    }
+
+    @Override
+    @ForceInline
+    public HalffloatMaxVector selectFrom(Vector<Halffloat> v,
+                                   VectorMask<Halffloat> m) {
+        return (HalffloatMaxVector)
+            super.selectFromTemplate((HalffloatMaxVector) v,
+                                     (HalffloatMaxMask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    public short lane(int i) {
+        if (i < 0 || i >= VLENGTH) {
+            throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        short bits = laneHelper(i);
+        return Halffloat.shortBitsToHalffloat(bits);
+    }
+
+    public short laneHelper(int i) {
+        return (short) VectorSupport.extract(
+                     VCLASS, ETYPE, VLENGTH,
+                     this, i,
+                     (vec, ix) -> {
+                     short[] vecarr = vec.vec();
+                     return (long)Halffloat.shortToShortBits(vecarr[ix]);
+                     });
+    }
+
+    @ForceInline
+    @Override
+    public HalffloatMaxVector withLane(int i, short e) {
+        if (i < 0 || i >= VLENGTH) {
+            throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH);
+        }
+        return withLaneHelper(i, e);
+    }
+
+    public HalffloatMaxVector withLaneHelper(int i, short e) {
+        return VectorSupport.insert(
+                                VCLASS, ETYPE, VLENGTH,
+                                this, i, (long)Halffloat.shortToShortBits(e),
+                                (v, ix, bits) -> {
+                                    short[] res = v.vec().clone();
+                                    res[ix] = Halffloat.shortBitsToHalffloat((short)bits);
+                                    return v.vectorFactory(res);
+                                });
+    }
+
+    // Mask
+
+    static final class HalffloatMaxMask extends AbstractMask<Halffloat> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+        HalffloatMaxMask(boolean[] bits) {
+            this(bits, 0);
+        }
+
+        HalffloatMaxMask(boolean[] bits, int offset) {
+            super(prepare(bits, offset));
+        }
+
+        HalffloatMaxMask(boolean val) {
+            super(prepare(val));
+        }
+
+        private static boolean[] prepare(boolean[] bits, int offset) {
+            boolean[] newBits = new boolean[VSPECIES.laneCount()];
+            for (int i = 0; i < newBits.length; i++) {
+                newBits[i] = bits[offset + i];
+            }
+            return newBits;
+        }
+
+        private static boolean[] prepare(boolean val) {
+            boolean[] bits = new boolean[VSPECIES.laneCount()];
+            Arrays.fill(bits, val);
+            return bits;
+        }
+
+        @ForceInline
+        final @Override
+        public HalffloatSpecies vspecies() {
+            // ISSUE:  This should probably be a @Stable
+            // field inside AbstractMask, rather than
+            // a megamorphic method.
+            return VSPECIES;
+        }
+
+        @ForceInline
+        boolean[] getBits() {
+            return (boolean[])getPayload();
+        }
+
+        @Override
+        HalffloatMaxMask uOp(MUnOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i]);
+            }
+            return new HalffloatMaxMask(res);
+        }
+
+        @Override
+        HalffloatMaxMask bOp(VectorMask<Halffloat> m, MBinOp f) {
+            boolean[] res = new boolean[vspecies().laneCount()];
+            boolean[] bits = getBits();
+            boolean[] mbits = ((HalffloatMaxMask)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i, bits[i], mbits[i]);
+            }
+            return new HalffloatMaxMask(res);
+        }
+
+        @ForceInline
+        @Override
+        public final
+        HalffloatMaxVector toVector() {
+            return (HalffloatMaxVector) super.toVectorTemplate();  // specialize
+        }
+
+        /**
+         * Helper function for lane-wise mask conversions.
+         * This function kicks in after intrinsic failure.
+         */
+        @ForceInline
+        private final <E>
+        VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) {
+            if (length() != dsp.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+            boolean[] maskArray = toArray();
+            return  dsp.maskFactory(maskArray).check(dsp);
+        }
+
+        @Override
+        @ForceInline
+        public <E> VectorMask<E> cast(VectorSpecies<E> dsp) {
+            AbstractSpecies<E> species = (AbstractSpecies<E>) dsp;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorMask length and species length differ");
+
+            return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                this.getClass(), ETYPE, VLENGTH,
+                species.maskType(), species.elementType(), VLENGTH,
+                this, species,
+                (m, s) -> s.maskFactory(m.toArray()).check(s));
+        }
+
+        @Override
+        @ForceInline
+        public HalffloatMaxMask eq(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            HalffloatMaxMask m = (HalffloatMaxMask)mask;
+            return xor(m.not());
+        }
+
+        // Unary operations
+
+        @Override
+        @ForceInline
+        public HalffloatMaxMask not() {
+            return xor(maskAll(true));
+        }
+
+        // Binary operations
+
+        @Override
+        @ForceInline
+        public HalffloatMaxMask and(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            HalffloatMaxMask m = (HalffloatMaxMask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_AND, HalffloatMaxMask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b));
+        }
+
+        @Override
+        @ForceInline
+        public HalffloatMaxMask or(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            HalffloatMaxMask m = (HalffloatMaxMask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_OR, HalffloatMaxMask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b));
+        }
+
+        @ForceInline
+        /* package-private */
+        HalffloatMaxMask xor(VectorMask<Halffloat> mask) {
+            Objects.requireNonNull(mask);
+            HalffloatMaxMask m = (HalffloatMaxMask)mask;
+            return VectorSupport.binaryOp(VECTOR_OP_XOR, HalffloatMaxMask.class, null, short.class, VLENGTH,
+                                          this, m, null,
+                                          (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b));
+        }
+
+        // Mask Query operations
+
+        @Override
+        @ForceInline
+        public int trueCount() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, HalffloatMaxMask.class, short.class, VLENGTH, this,
+                                                      (m) -> trueCountHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int firstTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, HalffloatMaxMask.class, short.class, VLENGTH, this,
+                                                      (m) -> firstTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public int lastTrue() {
+            return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, HalffloatMaxMask.class, short.class, VLENGTH, this,
+                                                      (m) -> lastTrueHelper(m.getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public long toLong() {
+            if (length() > Long.SIZE) {
+                throw new UnsupportedOperationException("too many lanes for one long");
+            }
+            return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, HalffloatMaxMask.class, short.class, VLENGTH, this,
+                                                      (m) -> toLongHelper(m.getBits()));
+        }
+
+        // Reductions
+
+        @Override
+        @ForceInline
+        public boolean anyTrue() {
+            return VectorSupport.test(BT_ne, HalffloatMaxMask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> anyTrueHelper(((HalffloatMaxMask)m).getBits()));
+        }
+
+        @Override
+        @ForceInline
+        public boolean allTrue() {
+            return VectorSupport.test(BT_overflow, HalffloatMaxMask.class, short.class, VLENGTH,
+                                         this, vspecies().maskAll(true),
+                                         (m, __) -> allTrueHelper(((HalffloatMaxMask)m).getBits()));
+        }
+
+        @ForceInline
+        /*package-private*/
+        static HalffloatMaxMask maskAll(boolean bit) {
+            return VectorSupport.broadcastCoerced(HalffloatMaxMask.class, short.class, VLENGTH,
+                                                  (bit ? -1 : 0), null,
+                                                  (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK));
+        }
+        private static final HalffloatMaxMask  TRUE_MASK = new HalffloatMaxMask(true);
+        private static final HalffloatMaxMask FALSE_MASK = new HalffloatMaxMask(false);
+
+    }
+
+    // Shuffle
+
+    static final class HalffloatMaxShuffle extends AbstractShuffle<Halffloat> {
+        static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
+        static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM
+
+        HalffloatMaxShuffle(byte[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public HalffloatMaxShuffle(int[] reorder) {
+            super(VLENGTH, reorder);
+        }
+
+        public HalffloatMaxShuffle(int[] reorder, int i) {
+            super(VLENGTH, reorder, i);
+        }
+
+        public HalffloatMaxShuffle(IntUnaryOperator fn) {
+            super(VLENGTH, fn);
+        }
+
+        @Override
+        public HalffloatSpecies vspecies() {
+            return VSPECIES;
+        }
+
+        static {
+            // There must be enough bits in the shuffle lanes to encode
+            // VLENGTH valid indexes and VLENGTH exceptional ones.
+            assert(VLENGTH < Byte.MAX_VALUE);
+            assert(Byte.MIN_VALUE <= -VLENGTH);
+        }
+        static final HalffloatMaxShuffle IOTA = new HalffloatMaxShuffle(IDENTITY);
+
+        @Override
+        @ForceInline
+        public HalffloatMaxVector toVector() {
+            return VectorSupport.shuffleToVector(VCLASS, ETYPE, HalffloatMaxShuffle.class, this, VLENGTH,
+                                                    (s) -> ((HalffloatMaxVector)(((AbstractShuffle<Halffloat>)(s)).toVectorTemplate())));
+        }
+
+        @Override
+        @ForceInline
+        public <F> VectorShuffle<F> cast(VectorSpecies<F> s) {
+            AbstractSpecies<F> species = (AbstractSpecies<F>) s;
+            if (length() != species.laneCount())
+                throw new IllegalArgumentException("VectorShuffle length and species length differ");
+            int[] shuffleArray = toArray();
+            return s.shuffleFromArray(shuffleArray, 0).check(s);
+        }
+
+        @ForceInline
+        @Override
+        public HalffloatMaxShuffle rearrange(VectorShuffle<Halffloat> shuffle) {
+            HalffloatMaxShuffle s = (HalffloatMaxShuffle) shuffle;
+            byte[] reorder1 = reorder();
+            byte[] reorder2 = s.reorder();
+            byte[] r = new byte[reorder1.length];
+            for (int i = 0; i < reorder1.length; i++) {
+                int ssi = reorder2[i];
+                r[i] = reorder1[ssi];  // throws on exceptional index
+            }
+            return new HalffloatMaxShuffle(r);
+        }
+    }
+
+    // ================================================
+
+    // Specialized low-level memory operations.
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(short[] a, int offset) {
+        return super.fromArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromCharArray0(char[] a, int offset) {
+        return super.fromCharArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromCharArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m);  // specialize
+    }
+
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteArray0(byte[] a, int offset) {
+        return super.fromByteArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) {
+        return super.fromByteArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset) {
+        return super.fromByteBuffer0Template(bb, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) {
+        return super.fromByteBuffer0Template(HalffloatMaxMask.class, bb, offset, (HalffloatMaxMask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(short[] a, int offset) {
+        super.intoArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoArray0(short[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m);
+    }
+
+
+
+    @ForceInline
+    @Override
+    final
+    void intoByteArray0(byte[] a, int offset) {
+        super.intoByteArray0Template(a, offset);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoByteArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m);  // specialize
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) {
+        super.intoByteBuffer0Template(HalffloatMaxMask.class, bb, offset, (HalffloatMaxMask) m);
+    }
+
+    @ForceInline
+    @Override
+    final
+    void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m) {
+        super.intoCharArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m);
+    }
+
+    // End of specialized low-level memory operations.
+
+    // ================================================
+
+}
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java
new file mode 100644
index 00000000000..7a748f72294
--- /dev/null
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java
@@ -0,0 +1,4151 @@
+/*
+ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.incubator.vector;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.ReadOnlyBufferException;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.Function;
+import java.util.function.UnaryOperator;
+
+import jdk.internal.misc.ScopedMemoryAccess;
+import jdk.internal.misc.Unsafe;
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.vector.VectorSupport;
+
+import static jdk.internal.vm.vector.VectorSupport.*;
+import static jdk.incubator.vector.VectorIntrinsics.*;
+
+import static jdk.incubator.vector.VectorOperators.*;
+
+// -- This file was mechanically generated: Do not edit! -- //
+
+/**
+ * A specialized {@link Vector} representing an ordered immutable sequence of
+ * {@code short} values.
+ */
+@SuppressWarnings("cast")  // warning: redundant cast
+public abstract class HalffloatVector extends AbstractVector<Halffloat> {
+
+    HalffloatVector(short[] vec) {
+        super(vec);
+    }
+
+    static final int FORBID_OPCODE_KIND = VO_NOFP;
+
+    @ForceInline
+    static int opCode(Operator op) {
+        return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
+    }
+    @ForceInline
+    static int opCode(Operator op, int requireKind) {
+        requireKind |= VO_OPCODE_VALID;
+        return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
+    }
+    @ForceInline
+    static boolean opKind(Operator op, int bit) {
+        return VectorOperators.opKind(op, bit);
+    }
+
+    // Virtualized factories and operators,
+    // coded with portable definitions.
+    // These are all @ForceInline in case
+    // they need to be used performantly.
+    // The various shape-specific subclasses
+    // also specialize them by wrapping
+    // them in a call like this:
+    //    return (Byte128Vector)
+    //       super.bOp((Byte128Vector) o);
+    // The purpose of that is to forcibly inline
+    // the generic definition from this file
+    // into a sharply type- and size-specific
+    // wrapper in the subclass file, so that
+    // the JIT can specialize the code.
+    // The code is only inlined and expanded
+    // if it gets hot.  Think of it as a cheap
+    // and lazy version of C++ templates.
+
+    // Virtualized getter
+
+    /*package-private*/
+    abstract short[] vec();
+
+    // Virtualized constructors
+
+    /**
+     * Build a vector directly using my own constructor.
+     * It is an error if the array is aliased elsewhere.
+     */
+    /*package-private*/
+    abstract HalffloatVector vectorFactory(short[] vec);
+
+    /**
+     * Build a mask directly using my species.
+     * It is an error if the array is aliased elsewhere.
+     */
+    /*package-private*/
+    @ForceInline
+    final
+    AbstractMask<Halffloat> maskFactory(boolean[] bits) {
+        return vspecies().maskFactory(bits);
+    }
+
+    // Constant loader (takes dummy as vector arg)
+    interface FVOp {
+        short apply(int i);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    HalffloatVector vOp(FVOp f) {
+        short[] res = new short[length()];
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(i);
+        }
+        return vectorFactory(res);
+    }
+
+    @ForceInline
+    final
+    HalffloatVector vOp(VectorMask<Halffloat> m, FVOp f) {
+        short[] res = new short[length()];
+        boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            if (mbits[i]) {
+                res[i] = f.apply(i);
+            }
+        }
+        return vectorFactory(res);
+    }
+
+    // Unary operator
+
+    /*package-private*/
+    interface FUnOp {
+        short apply(int i, short a);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector uOp(FUnOp f);
+    @ForceInline
+    final
+    HalffloatVector uOpTemplate(FUnOp f) {
+        short[] vec = vec();
+        short[] res = new short[length()];
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(i, vec[i]);
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector uOp(VectorMask<Halffloat> m,
+                             FUnOp f);
+    @ForceInline
+    final
+    HalffloatVector uOpTemplate(VectorMask<Halffloat> m,
+                                     FUnOp f) {
+        if (m == null) {
+            return uOpTemplate(f);
+        }
+        short[] vec = vec();
+        short[] res = new short[length()];
+        boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
+        }
+        return vectorFactory(res);
+    }
+
+    // Binary operator
+
+    /*package-private*/
+    interface FBinOp {
+        short apply(int i, short a, short b);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector bOp(Vector<Halffloat> o,
+                             FBinOp f);
+    @ForceInline
+    final
+    HalffloatVector bOpTemplate(Vector<Halffloat> o,
+                                     FBinOp f) {
+        short[] res = new short[length()];
+        short[] vec1 = this.vec();
+        short[] vec2 = ((HalffloatVector)o).vec();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(i, vec1[i], vec2[i]);
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector bOp(Vector<Halffloat> o,
+                             VectorMask<Halffloat> m,
+                             FBinOp f);
+    @ForceInline
+    final
+    HalffloatVector bOpTemplate(Vector<Halffloat> o,
+                                     VectorMask<Halffloat> m,
+                                     FBinOp f) {
+        if (m == null) {
+            return bOpTemplate(o, f);
+        }
+        short[] res = new short[length()];
+        short[] vec1 = this.vec();
+        short[] vec2 = ((HalffloatVector)o).vec();
+        boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
+        }
+        return vectorFactory(res);
+    }
+
+    // Ternary operator
+
+    /*package-private*/
+    interface FTriOp {
+        short apply(int i, short a, short b, short c);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector tOp(Vector<Halffloat> o1,
+                             Vector<Halffloat> o2,
+                             FTriOp f);
+    @ForceInline
+    final
+    HalffloatVector tOpTemplate(Vector<Halffloat> o1,
+                                     Vector<Halffloat> o2,
+                                     FTriOp f) {
+        short[] res = new short[length()];
+        short[] vec1 = this.vec();
+        short[] vec2 = ((HalffloatVector)o1).vec();
+        short[] vec3 = ((HalffloatVector)o2).vec();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector tOp(Vector<Halffloat> o1,
+                             Vector<Halffloat> o2,
+                             VectorMask<Halffloat> m,
+                             FTriOp f);
+    @ForceInline
+    final
+    HalffloatVector tOpTemplate(Vector<Halffloat> o1,
+                                     Vector<Halffloat> o2,
+                                     VectorMask<Halffloat> m,
+                                     FTriOp f) {
+        if (m == null) {
+            return tOpTemplate(o1, o2, f);
+        }
+        short[] res = new short[length()];
+        short[] vec1 = this.vec();
+        short[] vec2 = ((HalffloatVector)o1).vec();
+        short[] vec3 = ((HalffloatVector)o2).vec();
+        boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
+        }
+        return vectorFactory(res);
+    }
+
+    // Reduction operator
+
+    /*package-private*/
+    abstract
+    short rOp(short v, VectorMask<Halffloat> m, FBinOp f);
+
+    @ForceInline
+    final
+    short rOpTemplate(short v, VectorMask<Halffloat> m, FBinOp f) {
+        if (m == null) {
+            return rOpTemplate(v, f);
+        }
+        short[] vec = vec();
+        boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits();
+        for (int i = 0; i < vec.length; i++) {
+            v = mbits[i] ? f.apply(i, v, vec[i]) : v;
+        }
+        return v;
+    }
+
+    @ForceInline
+    final
+    short rOpTemplate(short v, FBinOp f) {
+        short[] vec = vec();
+        for (int i = 0; i < vec.length; i++) {
+            v = f.apply(i, v, vec[i]);
+        }
+        return v;
+    }
+
+    // Memory reference
+
+    /*package-private*/
+    interface FLdOp<M> {
+        short apply(M memory, int offset, int i);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M> HalffloatVector ldOp(M memory, int offset,
+                                  FLdOp<M> f) {
+        //dummy; no vec = vec();
+        short[] res = new short[length()];
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(memory, offset, i);
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M> HalffloatVector ldOp(M memory, int offset,
+                                  VectorMask<Halffloat> m,
+                                  FLdOp<M> f) {
+        //short[] vec = vec();
+        short[] res = new short[length()];
+        boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            if (mbits[i]) {
+                res[i] = f.apply(memory, offset, i);
+            }
+        }
+        return vectorFactory(res);
+    }
+
+    interface FStOp<M> {
+        void apply(M memory, int offset, int i, short a);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M> void stOp(M memory, int offset,
+                  FStOp<M> f) {
+        short[] vec = vec();
+        for (int i = 0; i < vec.length; i++) {
+            f.apply(memory, offset, i, vec[i]);
+        }
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M> void stOp(M memory, int offset,
+                  VectorMask<Halffloat> m,
+                  FStOp<M> f) {
+        short[] vec = vec();
+        boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits();
+        for (int i = 0; i < vec.length; i++) {
+            if (mbits[i]) {
+                f.apply(memory, offset, i, vec[i]);
+            }
+        }
+    }
+
+    // Binary test
+
+    /*package-private*/
+    interface FBinTest {
+        boolean apply(int cond, int i, short a, short b);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    AbstractMask<Halffloat> bTest(int cond,
+                                  Vector<Halffloat> o,
+                                  FBinTest f) {
+        short[] vec1 = vec();
+        short[] vec2 = ((HalffloatVector)o).vec();
+        boolean[] bits = new boolean[length()];
+        for (int i = 0; i < length(); i++){
+            bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
+        }
+        return maskFactory(bits);
+    }
+
+
+    /*package-private*/
+    @Override
+    abstract HalffloatSpecies vspecies();
+
+    /*package-private*/
+    @ForceInline
+    static long toBits(short e) {
+        return  Halffloat.shortToRawShortBits(e);
+    }
+
+    /*package-private*/
+    @ForceInline
+    static short fromBits(long bits) {
+        return Halffloat.shortBitsToHalffloat((short)bits);
+    }
+
+    // Static factories (other than memory operations)
+
+    // Note: A surprising behavior in javadoc
+    // sometimes makes a lone /** {@inheritDoc} */
+    // comment drop the method altogether,
+    // apparently if the method mentions an
+    // parameter or return type of Vector<Halffloat>
+    // instead of Vector<E> as originally specified.
+    // Adding an empty HTML fragment appears to
+    // nudge javadoc into providing the desired
+    // inherited documentation.  We use the HTML
+    // comment <!--workaround--> for this.
+
+    /**
+     * Returns a vector of the given species
+     * where all lane elements are set to
+     * zero, the default primitive value.
+     *
+     * @param species species of the desired zero vector
+     * @return a zero vector
+     */
+    @ForceInline
+    public static HalffloatVector zero(VectorSpecies<Halffloat> species) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return VectorSupport.broadcastCoerced(vsp.vectorType(), Halffloat.class, species.length(),
+                        toBits((short)0), vsp,
+                        ((bits_, s_) -> s_.rvOp(i -> bits_)));
+    }
+
+    /**
+     * Returns a vector of the same species as this one
+     * where all lane elements are set to
+     * the primitive value {@code e}.
+     *
+     * The contents of the current vector are discarded;
+     * only the species is relevant to this operation.
+     *
+     * <p> This method returns the value of this expression:
+     * {@code HalffloatVector.broadcast(this.species(), e)}.
+     *
+     * @apiNote
+     * Unlike the similar method named {@code broadcast()}
+     * in the supertype {@code Vector}, this method does not
+     * need to validate its argument, and cannot throw
+     * {@code IllegalArgumentException}.  This method is
+     * therefore preferable to the supertype method.
+     *
+     * @param e the value to broadcast
+     * @return a vector where all lane elements are set to
+     *         the primitive value {@code e}
+     * @see #broadcast(VectorSpecies,long)
+     * @see Vector#broadcast(long)
+     * @see VectorSpecies#broadcast(long)
+     */
+    public abstract HalffloatVector broadcast(short e);
+
+    /**
+     * Returns a vector of the given species
+     * where all lane elements are set to
+     * the primitive value {@code e}.
+     *
+     * @param species species of the desired vector
+     * @param e the value to broadcast
+     * @return a vector where all lane elements are set to
+     *         the primitive value {@code e}
+     * @see #broadcast(long)
+     * @see Vector#broadcast(long)
+     * @see VectorSpecies#broadcast(long)
+     */
+    @ForceInline
+    public static HalffloatVector broadcast(VectorSpecies<Halffloat> species, short e) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.broadcast(e);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final HalffloatVector broadcastTemplate(short e) {
+        HalffloatSpecies vsp = vspecies();
+        return vsp.broadcast(e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote
+     * When working with vector subtypes like {@code HalffloatVector},
+     * {@linkplain #broadcast(short) the more strongly typed method}
+     * is typically selected.  It can be explicitly selected
+     * using a cast: {@code v.broadcast((short)e)}.
+     * The two expressions will produce numerically identical results.
+     */
+    @Override
+    public abstract HalffloatVector broadcast(long e);
+
+    /**
+     * Returns a vector of the given species
+     * where all lane elements are set to
+     * the primitive value {@code e}.
+     *
+     * The {@code long} value must be accurately representable
+     * by the {@code ETYPE} of the vector species, so that
+     * {@code e==(long)(ETYPE)e}.
+     *
+     * @param species species of the desired vector
+     * @param e the value to broadcast
+     * @return a vector where all lane elements are set to
+     *         the primitive value {@code e}
+     * @throws IllegalArgumentException
+     *         if the given {@code long} value cannot
+     *         be represented by the vector's {@code ETYPE}
+     * @see #broadcast(VectorSpecies,short)
+     * @see VectorSpecies#checkValue(long)
+     */
+    @ForceInline
+    public static HalffloatVector broadcast(VectorSpecies<Halffloat> species, long e) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.broadcast(e);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final HalffloatVector broadcastTemplate(long e) {
+        return vspecies().broadcast(e);
+    }
+
+    // Unary lanewise support
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Unary op);
+
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Unary op) {
+        if (opKind(op, VO_SPECIAL)) {
+            if (op == ZOMO) {
+                return blend(broadcast(-1), compare(NE, 0));
+            }
+        }
+        int opc = opCode(op);
+        return VectorSupport.unaryOp(
+            opc, getClass(), null, Halffloat.class, length(),
+            this, null,
+            UN_IMPL.find(op, opc, HalffloatVector::unaryOperations));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Unary op,
+                                  VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Unary op,
+                                          Class<? extends VectorMask<Halffloat>> maskClass,
+                                          VectorMask<Halffloat> m) {
+        m.check(maskClass, this);
+        if (opKind(op, VO_SPECIAL)) {
+            if (op == ZOMO) {
+                return blend(broadcast(-1), compare(NE, 0, m));
+            }
+        }
+        int opc = opCode(op);
+        return VectorSupport.unaryOp(
+            opc, getClass(), maskClass, Halffloat.class, length(),
+            this, m,
+            UN_IMPL.find(op, opc, HalffloatVector::unaryOperations));
+    }
+
+    private static final
+    ImplCache<Unary, UnaryOperation<HalffloatVector, VectorMask<Halffloat>>>
+        UN_IMPL = new ImplCache<>(Unary.class, HalffloatVector.class);
+
+    private static UnaryOperation<HalffloatVector, VectorMask<Halffloat>> unaryOperations(int opc_) {
+        switch (opc_) {
+            case VECTOR_OP_NEG: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> (short) -a);
+            case VECTOR_OP_ABS: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> (short) Math.abs(a));
+            default: return null;
+        }
+    }
+
+    // Binary lanewise support
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #lanewise(VectorOperators.Binary,short)
+     * @see #lanewise(VectorOperators.Binary,short,VectorMask)
+     */
+    @Override
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  Vector<Halffloat> v);
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Binary op,
+                                          Vector<Halffloat> v) {
+        HalffloatVector that = (HalffloatVector) v;
+        that.check(this);
+
+        if (opKind(op, VO_SPECIAL )) {
+            if (op == FIRST_NONZERO) {
+                // FIXME: Support this in the JIT.
+                VectorMask<Short> thisNZ
+                    = this.viewAsIntegralLanes().compare(NE, (short) 0);
+                that = that.blend((short) 0, thisNZ.cast(vspecies()));
+                op = OR_UNCHECKED;
+                // FIXME: Support OR_UNCHECKED on float/double also!
+                return this.viewAsIntegralLanes()
+                    .lanewise(op, that.viewAsIntegralLanes())
+                    .viewAsFloatingLanes();
+            }
+        }
+
+        int opc = opCode(op);
+        return VectorSupport.binaryOp(
+            opc, getClass(), null, Halffloat.class, length(),
+            this, that, null,
+            BIN_IMPL.find(op, opc, HalffloatVector::binaryOperations));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #lanewise(VectorOperators.Binary,short,VectorMask)
+     */
+    @Override
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  Vector<Halffloat> v,
+                                  VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Binary op,
+                                          Class<? extends VectorMask<Halffloat>> maskClass,
+                                          Vector<Halffloat> v, VectorMask<Halffloat> m) {
+        HalffloatVector that = (HalffloatVector) v;
+        that.check(this);
+        m.check(maskClass, this);
+
+        if (opKind(op, VO_SPECIAL )) {
+            if (op == FIRST_NONZERO) {
+                return blend(lanewise(op, v), m);
+            }
+        }
+
+        int opc = opCode(op);
+        return VectorSupport.binaryOp(
+            opc, getClass(), maskClass, Halffloat.class, length(),
+            this, that, m,
+            BIN_IMPL.find(op, opc, HalffloatVector::binaryOperations));
+    }
+
+    private static final
+    ImplCache<Binary, BinaryOperation<HalffloatVector, VectorMask<Halffloat>>>
+        BIN_IMPL = new ImplCache<>(Binary.class, HalffloatVector.class);
+
+    private static BinaryOperation<HalffloatVector, VectorMask<Halffloat>> binaryOperations(int opc_) {
+        switch (opc_) {
+            case VECTOR_OP_ADD: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() + Halffloat.valueOf(b).floatValue())));
+            case VECTOR_OP_SUB: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() - Halffloat.valueOf(b).floatValue())));
+            case VECTOR_OP_MUL: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() * Halffloat.valueOf(b).floatValue())));
+            case VECTOR_OP_MAX: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf(Math.max(Halffloat.valueOf(a).floatValue(),Halffloat.valueOf(b).floatValue())));
+            case VECTOR_OP_MIN: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf(Math.min(Halffloat.valueOf(a).floatValue(),Halffloat.valueOf(b).floatValue())));
+            default: return null;
+        }
+    }
+
+    // FIXME: Maybe all of the public final methods in this file (the
+    // simple ones that just call lanewise) should be pushed down to
+    // the X-VectorBits template.  They can't optimize properly at
+    // this level, and must rely on inlining.  Does it work?
+    // (If it works, of course keep the code here.)
+
+    /**
+     * Combines the lane values of this vector
+     * with the value of a broadcast scalar.
+     *
+     * This is a lane-wise binary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e))}.
+     *
+     * @param op the operation used to process lane values
+     * @param e the input scalar
+     * @return the result of applying the operation lane-wise
+     *         to the two input vectors
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,short,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  short e) {
+        return lanewise(op, broadcast(e));
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the value of a broadcast scalar,
+     * with selection of lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e), m)}.
+     *
+     * @param op the operation used to process lane values
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of applying the operation lane-wise
+     *         to the input vector and the scalar
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Binary,short)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  short e,
+                                  VectorMask<Halffloat> m) {
+        return lanewise(op, broadcast(e), m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote
+     * When working with vector subtypes like {@code HalffloatVector},
+     * {@linkplain #lanewise(VectorOperators.Binary,short)
+     * the more strongly typed method}
+     * is typically selected.  It can be explicitly selected
+     * using a cast: {@code v.lanewise(op,(short)e)}.
+     * The two expressions will produce numerically identical results.
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  long e) {
+        short e1 = (short) e;
+        if ((long)e1 != e) {
+            vspecies().checkValue(e);  // for exception
+        }
+        return lanewise(op, e1);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote
+     * When working with vector subtypes like {@code HalffloatVector},
+     * {@linkplain #lanewise(VectorOperators.Binary,short,VectorMask)
+     * the more strongly typed method}
+     * is typically selected.  It can be explicitly selected
+     * using a cast: {@code v.lanewise(op,(short)e,m)}.
+     * The two expressions will produce numerically identical results.
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Binary op,
+                                  long e, VectorMask<Halffloat> m) {
+        short e1 = (short) e;
+        if ((long)e1 != e) {
+            vspecies().checkValue(e);  // for exception
+        }
+        return lanewise(op, e1, m);
+    }
+
+
+    // Ternary lanewise support
+
+    // Ternary operators come in eight variations:
+    //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
+    //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
+
+    // It is annoying to support all of these variations of masking
+    // and broadcast, but it would be more surprising not to continue
+    // the obvious pattern started by unary and binary.
+
+   /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,short,short)
+     * @see #lanewise(VectorOperators.Ternary,Vector,short)
+     * @see #lanewise(VectorOperators.Ternary,short,Vector)
+     */
+    @Override
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Ternary op,
+                                                  Vector<Halffloat> v1,
+                                                  Vector<Halffloat> v2);
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Ternary op,
+                                          Vector<Halffloat> v1,
+                                          Vector<Halffloat> v2) {
+        HalffloatVector that = (HalffloatVector) v1;
+        HalffloatVector tother = (HalffloatVector) v2;
+        // It's a word: https://www.dictionary.com/browse/tother
+        // See also Chapter 11 of Dickens, Our Mutual Friend:
+        // "Totherest Governor," replied Mr Riderhood...
+        that.check(this);
+        tother.check(this);
+        int opc = opCode(op);
+        return VectorSupport.ternaryOp(
+            opc, getClass(), null, Halffloat.class, length(),
+            this, that, tother, null,
+            TERN_IMPL.find(op, opc, HalffloatVector::ternaryOperations));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
+     */
+    @Override
+    public abstract
+    HalffloatVector lanewise(VectorOperators.Ternary op,
+                                  Vector<Halffloat> v1,
+                                  Vector<Halffloat> v2,
+                                  VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    HalffloatVector lanewiseTemplate(VectorOperators.Ternary op,
+                                          Class<? extends VectorMask<Halffloat>> maskClass,
+                                          Vector<Halffloat> v1,
+                                          Vector<Halffloat> v2,
+                                          VectorMask<Halffloat> m) {
+        HalffloatVector that = (HalffloatVector) v1;
+        HalffloatVector tother = (HalffloatVector) v2;
+        // It's a word: https://www.dictionary.com/browse/tother
+        // See also Chapter 11 of Dickens, Our Mutual Friend:
+        // "Totherest Governor," replied Mr Riderhood...
+        that.check(this);
+        tother.check(this);
+        m.check(maskClass, this);
+
+        int opc = opCode(op);
+        return VectorSupport.ternaryOp(
+            opc, getClass(), maskClass, Halffloat.class, length(),
+            this, that, tother, m,
+            TERN_IMPL.find(op, opc, HalffloatVector::ternaryOperations));
+    }
+
+    private static final
+    ImplCache<Ternary, TernaryOperation<HalffloatVector, VectorMask<Halffloat>>>
+        TERN_IMPL = new ImplCache<>(Ternary.class, HalffloatVector.class);
+
+    private static TernaryOperation<HalffloatVector, VectorMask<Halffloat>> ternaryOperations(int opc_) {
+        switch (opc_) {
+            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) ->
+                    Halffloat.valueOf(Math.fma(Halffloat.valueOf(a).floatValue(),
+                    Halffloat.valueOf(b).floatValue(), Halffloat.valueOf(c).floatValue())));
+            default: return null;
+        }
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of two broadcast scalars.
+     *
+     * This is a lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
+     *
+     * @param op the operation used to combine lane values
+     * @param e1 the first input scalar
+     * @param e2 the second input scalar
+     * @return the result of applying the operation lane-wise
+     *         to the input vector and the scalars
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
+     * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
+                                  short e1,
+                                  short e2) {
+        return lanewise(op, broadcast(e1), broadcast(e2));
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of two broadcast scalars,
+     * with selection of lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
+     *
+     * @param op the operation used to combine lane values
+     * @param e1 the first input scalar
+     * @param e2 the second input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of applying the operation lane-wise
+     *         to the input vector and the scalars
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,short,short)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
+                                  short e1,
+                                  short e2,
+                                  VectorMask<Halffloat> m) {
+        return lanewise(op, broadcast(e1), broadcast(e2), m);
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of another vector and a broadcast scalar.
+     *
+     * This is a lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, v1, this.broadcast(e2))}.
+     *
+     * @param op the operation used to combine lane values
+     * @param v1 the other input vector
+     * @param e2 the input scalar
+     * @return the result of applying the operation lane-wise
+     *         to the input vectors and the scalar
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,short,short)
+     * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
+                                  Vector<Halffloat> v1,
+                                  short e2) {
+        return lanewise(op, v1, broadcast(e2));
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of another vector and a broadcast scalar,
+     * with selection of lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
+     *
+     * @param op the operation used to combine lane values
+     * @param v1 the other input vector
+     * @param e2 the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of applying the operation lane-wise
+     *         to the input vectors and the scalar
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
+     * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,Vector,short)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
+                                  Vector<Halffloat> v1,
+                                  short e2,
+                                  VectorMask<Halffloat> m) {
+        return lanewise(op, v1, broadcast(e2), m);
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of another vector and a broadcast scalar.
+     *
+     * This is a lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e1), v2)}.
+     *
+     * @param op the operation used to combine lane values
+     * @param e1 the input scalar
+     * @param v2 the other input vector
+     * @return the result of applying the operation lane-wise
+     *         to the input vectors and the scalar
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
+     * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
+                                  short e1,
+                                  Vector<Halffloat> v2) {
+        return lanewise(op, broadcast(e1), v2);
+    }
+
+    /**
+     * Combines the lane values of this vector
+     * with the values of another vector and a broadcast scalar,
+     * with selection of lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise ternary operation which applies
+     * the selected operation to each lane.
+     * The return value will be equal to this expression:
+     * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
+     *
+     * @param op the operation used to combine lane values
+     * @param e1 the input scalar
+     * @param v2 the other input vector
+     * @param m the mask controlling lane selection
+     * @return the result of applying the operation lane-wise
+     *         to the input vectors and the scalar
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
+     * @see #lanewise(VectorOperators.Ternary,short,Vector)
+     */
+    @ForceInline
+    public final
+    HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
+                                  short e1,
+                                  Vector<Halffloat> v2,
+                                  VectorMask<Halffloat> m) {
+        return lanewise(op, broadcast(e1), v2, m);
+    }
+
+    // (Thus endeth the Great and Mighty Ternary Ogdoad.)
+    // https://en.wikipedia.org/wiki/Ogdoad
+
+    /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
+    //
+    // These include masked and non-masked versions.
+    // This subclass adds broadcast (masked or not).
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #add(short)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector add(Vector<Halffloat> v) {
+        return lanewise(ADD, v);
+    }
+
+    /**
+     * Adds this vector to the broadcast of an input scalar.
+     *
+     * This is a lane-wise binary operation which applies
+     * the primitive addition operation ({@code +}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,short)
+     *    lanewise}{@code (}{@link VectorOperators#ADD
+     *    ADD}{@code , e)}.
+     *
+     * @param e the input scalar
+     * @return the result of adding each lane of this vector to the scalar
+     * @see #add(Vector)
+     * @see #broadcast(short)
+     * @see #add(short,VectorMask)
+     * @see VectorOperators#ADD
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,short)
+     */
+    @ForceInline
+    public final
+    HalffloatVector add(short e) {
+        return lanewise(ADD, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #add(short,VectorMask)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector add(Vector<Halffloat> v,
+                                          VectorMask<Halffloat> m) {
+        return lanewise(ADD, v, m);
+    }
+
+    /**
+     * Adds this vector to the broadcast of an input scalar,
+     * selecting lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the primitive addition operation ({@code +}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
+     *    lanewise}{@code (}{@link VectorOperators#ADD
+     *    ADD}{@code , s, m)}.
+     *
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of adding each lane of this vector to the scalar
+     * @see #add(Vector,VectorMask)
+     * @see #broadcast(short)
+     * @see #add(short)
+     * @see VectorOperators#ADD
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,short)
+     */
+    @ForceInline
+    public final HalffloatVector add(short e,
+                                          VectorMask<Halffloat> m) {
+        return lanewise(ADD, e, m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #sub(short)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector sub(Vector<Halffloat> v) {
+        return lanewise(SUB, v);
+    }
+
+    /**
+     * Subtracts an input scalar from this vector.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the primitive subtraction operation ({@code -}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,short)
+     *    lanewise}{@code (}{@link VectorOperators#SUB
+     *    SUB}{@code , e)}.
+     *
+     * @param e the input scalar
+     * @return the result of subtracting the scalar from each lane of this vector
+     * @see #sub(Vector)
+     * @see #broadcast(short)
+     * @see #sub(short,VectorMask)
+     * @see VectorOperators#SUB
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,short)
+     */
+    @ForceInline
+    public final HalffloatVector sub(short e) {
+        return lanewise(SUB, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #sub(short,VectorMask)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector sub(Vector<Halffloat> v,
+                                          VectorMask<Halffloat> m) {
+        return lanewise(SUB, v, m);
+    }
+
+    /**
+     * Subtracts an input scalar from this vector
+     * under the control of a mask.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the primitive subtraction operation ({@code -}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
+     *    lanewise}{@code (}{@link VectorOperators#SUB
+     *    SUB}{@code , s, m)}.
+     *
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of subtracting the scalar from each lane of this vector
+     * @see #sub(Vector,VectorMask)
+     * @see #broadcast(short)
+     * @see #sub(short)
+     * @see VectorOperators#SUB
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,short)
+     */
+    @ForceInline
+    public final HalffloatVector sub(short e,
+                                          VectorMask<Halffloat> m) {
+        return lanewise(SUB, e, m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #mul(short)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector mul(Vector<Halffloat> v) {
+        return lanewise(MUL, v);
+    }
+
+    /**
+     * Multiplies this vector by the broadcast of an input scalar.
+     *
+     * This is a lane-wise binary operation which applies
+     * the primitive multiplication operation ({@code *}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,short)
+     *    lanewise}{@code (}{@link VectorOperators#MUL
+     *    MUL}{@code , e)}.
+     *
+     * @param e the input scalar
+     * @return the result of multiplying this vector by the given scalar
+     * @see #mul(Vector)
+     * @see #broadcast(short)
+     * @see #mul(short,VectorMask)
+     * @see VectorOperators#MUL
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,short)
+     */
+    @ForceInline
+    public final HalffloatVector mul(short e) {
+        return lanewise(MUL, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #mul(short,VectorMask)
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector mul(Vector<Halffloat> v,
+                                          VectorMask<Halffloat> m) {
+        return lanewise(MUL, v, m);
+    }
+
+    /**
+     * Multiplies this vector by the broadcast of an input scalar,
+     * selecting lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the primitive multiplication operation ({@code *}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
+     *    lanewise}{@code (}{@link VectorOperators#MUL
+     *    MUL}{@code , s, m)}.
+     *
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of muling each lane of this vector to the scalar
+     * @see #mul(Vector,VectorMask)
+     * @see #broadcast(short)
+     * @see #mul(short)
+     * @see VectorOperators#MUL
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,short)
+     */
+    @ForceInline
+    public final HalffloatVector mul(short e,
+                                          VectorMask<Halffloat> m) {
+        return lanewise(MUL, e, m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote Because the underlying scalar operator is an IEEE
+     * floating point number, division by zero in fact will
+     * not throw an exception, but will yield a signed
+     * infinity or NaN.
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector div(Vector<Halffloat> v) {
+        return lanewise(DIV, v);
+    }
+
+    /**
+     * Divides this vector by the broadcast of an input scalar.
+     *
+     * This is a lane-wise binary operation which applies
+     * the primitive division operation ({@code /}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,short)
+     *    lanewise}{@code (}{@link VectorOperators#DIV
+     *    DIV}{@code , e)}.
+     *
+     * @apiNote Because the underlying scalar operator is an IEEE
+     * floating point number, division by zero in fact will
+     * not throw an exception, but will yield a signed
+     * infinity or NaN.
+     *
+     * @param e the input scalar
+     * @return the result of dividing each lane of this vector by the scalar
+     * @see #div(Vector)
+     * @see #broadcast(short)
+     * @see #div(short,VectorMask)
+     * @see VectorOperators#DIV
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,short)
+     */
+    @ForceInline
+    public final HalffloatVector div(short e) {
+        return lanewise(DIV, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @see #div(short,VectorMask)
+     * @apiNote Because the underlying scalar operator is an IEEE
+     * floating point number, division by zero in fact will
+     * not throw an exception, but will yield a signed
+     * infinity or NaN.
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector div(Vector<Halffloat> v,
+                                          VectorMask<Halffloat> m) {
+        return lanewise(DIV, v, m);
+    }
+
+    /**
+     * Divides this vector by the broadcast of an input scalar,
+     * selecting lane elements controlled by a mask.
+     *
+     * This is a masked lane-wise binary operation which applies
+     * the primitive division operation ({@code /}) to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
+     *    lanewise}{@code (}{@link VectorOperators#DIV
+     *    DIV}{@code , s, m)}.
+     *
+     * @apiNote Because the underlying scalar operator is an IEEE
+     * floating point number, division by zero in fact will
+     * not throw an exception, but will yield a signed
+     * infinity or NaN.
+     *
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the result of dividing each lane of this vector by the scalar
+     * @see #div(Vector,VectorMask)
+     * @see #broadcast(short)
+     * @see #div(short)
+     * @see VectorOperators#DIV
+     * @see #lanewise(VectorOperators.Binary,Vector)
+     * @see #lanewise(VectorOperators.Binary,short)
+     */
+    @ForceInline
+    public final HalffloatVector div(short e,
+                                          VectorMask<Halffloat> m) {
+        return lanewise(DIV, e, m);
+    }
+
+    /// END OF FULL-SERVICE BINARY METHODS
+
+    /// SECOND-TIER BINARY METHODS
+    //
+    // There are no masked versions.
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote
+     * For this method, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from, and less
+     * than the default value (positive zero).
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector min(Vector<Halffloat> v) {
+        return lanewise(MIN, v);
+    }
+
+    // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
+    /**
+     * Computes the smaller of this vector and the broadcast of an input scalar.
+     *
+     * This is a lane-wise binary operation which applies the
+     * operation {@code Math.min()} to each pair of
+     * corresponding lane values.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,short)
+     *    lanewise}{@code (}{@link VectorOperators#MIN
+     *    MIN}{@code , e)}.
+     *
+     * @param e the input scalar
+     * @return the result of multiplying this vector by the given scalar
+     * @see #min(Vector)
+     * @see #broadcast(short)
+     * @see VectorOperators#MIN
+     * @see #lanewise(VectorOperators.Binary,short,VectorMask)
+     * @apiNote
+     * For this method, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from, and less
+     * than the default value (positive zero).
+     */
+    @ForceInline
+    public final HalffloatVector min(short e) {
+        return lanewise(MIN, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     * @apiNote
+     * For this method, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from, and less
+     * than the default value (positive zero).
+     */
+    @Override
+    @ForceInline
+    public final HalffloatVector max(Vector<Halffloat> v) {
+        return lanewise(MAX, v);
+    }
+
+    /**
+     * Computes the larger of this vector and the broadcast of an input scalar.
+     *
+     * This is a lane-wise binary operation which applies the
+     * operation {@code Math.max()} to each pair of
+     * corresponding lane values.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,short)
+     *    lanewise}{@code (}{@link VectorOperators#MAX
+     *    MAX}{@code , e)}.
+     *
+     * @param e the input scalar
+     * @return the result of multiplying this vector by the given scalar
+     * @see #max(Vector)
+     * @see #broadcast(short)
+     * @see VectorOperators#MAX
+     * @see #lanewise(VectorOperators.Binary,short,VectorMask)
+     * @apiNote
+     * For this method, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from, and less
+     * than the default value (positive zero).
+     */
+    @ForceInline
+    public final HalffloatVector max(short e) {
+        return lanewise(MAX, e);
+    }
+
+
+    // common FP operator: pow
+    /**
+     * Raises this vector to the power of a second input vector.
+     *
+     * This is a lane-wise binary operation which applies an operation
+     * conforming to the specification of
+     * {@link Math#pow Math.pow(a,b)}
+     * to each pair of corresponding lane values.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Vector)
+     *    lanewise}{@code (}{@link VectorOperators#POW
+     *    POW}{@code , b)}.
+     *
+     * <p>
+     * This is not a full-service named operation like
+     * {@link #add(Vector) add}.  A masked version of
+     * this operation is not directly available
+     * but may be obtained via the masked version of
+     * {@code lanewise}.
+     *
+     * @param b a vector exponent by which to raise this vector
+     * @return the {@code b}-th power of this vector
+     * @see #pow(short)
+     * @see VectorOperators#POW
+     * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
+     */
+    @ForceInline
+    public final HalffloatVector pow(Vector<Halffloat> b) {
+        return lanewise(POW, b);
+    }
+
+    /**
+     * Raises this vector to a scalar power.
+     *
+     * This is a lane-wise binary operation which applies an operation
+     * conforming to the specification of
+     * {@link Math#pow Math.pow(a,b)}
+     * to each pair of corresponding lane values.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Binary,Vector)
+     *    lanewise}{@code (}{@link VectorOperators#POW
+     *    POW}{@code , b)}.
+     *
+     * @param b a scalar exponent by which to raise this vector
+     * @return the {@code b}-th power of this vector
+     * @see #pow(Vector)
+     * @see VectorOperators#POW
+     * @see #lanewise(VectorOperators.Binary,short,VectorMask)
+     */
+    @ForceInline
+    public final HalffloatVector pow(short b) {
+        return lanewise(POW, b);
+    }
+
+    /// UNARY METHODS
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    HalffloatVector neg() {
+        return lanewise(NEG);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    HalffloatVector abs() {
+        return lanewise(ABS);
+    }
+
+
+    // sqrt
+    /**
+     * Computes the square root of this vector.
+     *
+     * This is a lane-wise unary operation which applies an operation
+     * conforming to the specification of
+     * {@link Math#sqrt Math.sqrt(a)}
+     * to each lane value.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Unary)
+     *    lanewise}{@code (}{@link VectorOperators#SQRT
+     *    SQRT}{@code )}.
+     *
+     * @return the square root of this vector
+     * @see VectorOperators#SQRT
+     * @see #lanewise(VectorOperators.Unary,VectorMask)
+     */
+    @ForceInline
+    public final HalffloatVector sqrt() {
+        return lanewise(SQRT);
+    }
+
+    /// COMPARISONS
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    VectorMask<Halffloat> eq(Vector<Halffloat> v) {
+        return compare(EQ, v);
+    }
+
+    /**
+     * Tests if this vector is equal to an input scalar.
+     *
+     * This is a lane-wise binary test operation which applies
+     * the primitive equals operation ({@code ==}) to each lane.
+     * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
+     *
+     * @param e the input scalar
+     * @return the result mask of testing if this vector
+     *         is equal to {@code e}
+     * @see #compare(VectorOperators.Comparison,short)
+     */
+    @ForceInline
+    public final
+    VectorMask<Halffloat> eq(short e) {
+        return compare(EQ, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    VectorMask<Halffloat> lt(Vector<Halffloat> v) {
+        return compare(LT, v);
+    }
+
+    /**
+     * Tests if this vector is less than an input scalar.
+     *
+     * This is a lane-wise binary test operation which applies
+     * the primitive less than operation ({@code <}) to each lane.
+     * The result is the same as {@code compare(VectorOperators.LT, e)}.
+     *
+     * @param e the input scalar
+     * @return the mask result of testing if this vector
+     *         is less than the input scalar
+     * @see #compare(VectorOperators.Comparison,short)
+     */
+    @ForceInline
+    public final
+    VectorMask<Halffloat> lt(short e) {
+        return compare(LT, e);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    VectorMask<Halffloat> test(VectorOperators.Test op);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    M testTemplate(Class<M> maskType, Test op) {
+        HalffloatSpecies vsp = vspecies();
+        if (opKind(op, VO_SPECIAL)) {
+            ShortVector bits = this.viewAsIntegralLanes();
+            VectorMask<Short> m;
+            if (op == IS_DEFAULT) {
+                m = bits.compare(EQ, (short) 0);
+            } else if (op == IS_NEGATIVE) {
+                m = bits.compare(LT, (short) 0);
+            }
+            else if (op == IS_FINITE ||
+                     op == IS_NAN ||
+                     op == IS_INFINITE) {
+                // first kill the sign:
+                bits = bits.and(Short.MAX_VALUE);
+                // next find the bit pattern for infinity:
+                short infbits = (short) toBits(Halffloat.POSITIVE_INFINITY);
+                // now compare:
+                if (op == IS_FINITE) {
+                    m = bits.compare(LT, infbits);
+                } else if (op == IS_NAN) {
+                    m = bits.compare(GT, infbits);
+                } else {
+                    m = bits.compare(EQ, infbits);
+                }
+            }
+            else {
+                throw new AssertionError(op);
+            }
+            return maskType.cast(m.cast(this.vspecies()));
+        }
+        int opc = opCode(op);
+        throw new AssertionError(op);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    VectorMask<Halffloat> test(VectorOperators.Test op,
+                                  VectorMask<Halffloat> m) {
+        return test(op).and(m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    VectorMask<Halffloat> compare(VectorOperators.Comparison op, Vector<Halffloat> v);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    M compareTemplate(Class<M> maskType, Comparison op, Vector<Halffloat> v) {
+        HalffloatVector that = (HalffloatVector) v;
+        that.check(this);
+        int opc = opCode(op);
+        return VectorSupport.compare(
+            opc, getClass(), maskType, Halffloat.class, length(),
+            this, that, null,
+            (cond, v0, v1, m1) -> {
+                AbstractMask<Halffloat> m
+                    = v0.bTest(cond, v1, (cond_, i, a, b)
+                               -> compareWithOp(cond, a, b));
+                @SuppressWarnings("unchecked")
+                M m2 = (M) m;
+                return m2;
+            });
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    M compareTemplate(Class<M> maskType, Comparison op, Vector<Halffloat> v, M m) {
+        HalffloatVector that = (HalffloatVector) v;
+        that.check(this);
+        m.check(maskType, this);
+        int opc = opCode(op);
+        return VectorSupport.compare(
+            opc, getClass(), maskType, Halffloat.class, length(),
+            this, that, m,
+            (cond, v0, v1, m1) -> {
+                AbstractMask<Halffloat> cmpM
+                    = v0.bTest(cond, v1, (cond_, i, a, b)
+                               -> compareWithOp(cond, a, b));
+                @SuppressWarnings("unchecked")
+                M m2 = (M) cmpM.and(m1);
+                return m2;
+            });
+    }
+
+    @ForceInline
+    private static boolean compareWithOp(int cond, short a, short b) {
+        return switch (cond) {
+            case BT_eq -> Halffloat.valueOf(a).floatValue() == Halffloat.valueOf(b).floatValue();
+            case BT_ne -> Halffloat.valueOf(a).floatValue() != Halffloat.valueOf(b).floatValue();
+            case BT_lt -> Halffloat.valueOf(a).floatValue() < Halffloat.valueOf(b).floatValue();
+            case BT_le -> Halffloat.valueOf(a).floatValue() <= Halffloat.valueOf(b).floatValue();
+            case BT_gt -> Halffloat.valueOf(a).floatValue() > Halffloat.valueOf(b).floatValue();
+            case BT_ge -> Halffloat.valueOf(a).floatValue() >= Halffloat.valueOf(b).floatValue();
+            default -> throw new AssertionError();
+        };
+    }
+
+    /**
+     * Tests this vector by comparing it with an input scalar,
+     * according to the given comparison operation.
+     *
+     * This is a lane-wise binary test operation which applies
+     * the comparison operation to each lane.
+     * <p>
+     * The result is the same as
+     * {@code compare(op, broadcast(species(), e))}.
+     * That is, the scalar may be regarded as broadcast to
+     * a vector of the same species, and then compared
+     * against the original vector, using the selected
+     * comparison operation.
+     *
+     * @param op the operation used to compare lane values
+     * @param e the input scalar
+     * @return the mask result of testing lane-wise if this vector
+     *         compares to the input, according to the selected
+     *         comparison operator
+     * @see HalffloatVector#compare(VectorOperators.Comparison,Vector)
+     * @see #eq(short)
+     * @see #lt(short)
+     */
+    public abstract
+    VectorMask<Halffloat> compare(Comparison op, short e);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    M compareTemplate(Class<M> maskType, Comparison op, short e) {
+        return compareTemplate(maskType, op, broadcast(e));
+    }
+
+    /**
+     * Tests this vector by comparing it with an input scalar,
+     * according to the given comparison operation,
+     * in lanes selected by a mask.
+     *
+     * This is a masked lane-wise binary test operation which applies
+     * to each pair of corresponding lane values.
+     *
+     * The returned result is equal to the expression
+     * {@code compare(op,s).and(m)}.
+     *
+     * @param op the operation used to compare lane values
+     * @param e the input scalar
+     * @param m the mask controlling lane selection
+     * @return the mask result of testing lane-wise if this vector
+     *         compares to the input, according to the selected
+     *         comparison operator,
+     *         and only in the lanes selected by the mask
+     * @see HalffloatVector#compare(VectorOperators.Comparison,Vector,VectorMask)
+     */
+    @ForceInline
+    public final VectorMask<Halffloat> compare(VectorOperators.Comparison op,
+                                               short e,
+                                               VectorMask<Halffloat> m) {
+        return compare(op, broadcast(e), m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    VectorMask<Halffloat> compare(Comparison op, long e);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    M compareTemplate(Class<M> maskType, Comparison op, long e) {
+        return compareTemplate(maskType, op, broadcast(e));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    VectorMask<Halffloat> compare(Comparison op, long e, VectorMask<Halffloat> m) {
+        return compare(op, broadcast(e), m);
+    }
+
+
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override public abstract
+    HalffloatVector blend(Vector<Halffloat> v, VectorMask<Halffloat> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    HalffloatVector
+    blendTemplate(Class<M> maskType, HalffloatVector v, M m) {
+        v.check(this);
+        return VectorSupport.blend(
+            getClass(), maskType, Halffloat.class, length(),
+            this, v, m,
+            (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override public abstract HalffloatVector addIndex(int scale);
+
+    /*package-private*/
+    @ForceInline
+    final HalffloatVector addIndexTemplate(int scale) {
+        HalffloatSpecies vsp = vspecies();
+        // make sure VLENGTH*scale doesn't overflow:
+        vsp.checkScale(scale);
+        return VectorSupport.indexVector(
+            getClass(), Halffloat.class, length(),
+            this, scale, vsp,
+            (v, scale_, s)
+            -> {
+                // If the platform doesn't support an INDEX
+                // instruction directly, load IOTA from memory
+                // and multiply.
+                HalffloatVector iota = s.iota();
+                short sc = (short) scale_;
+                return v.add(sc == 1 ? iota : iota.mul(sc));
+            });
+    }
+
+    /**
+     * Replaces selected lanes of this vector with
+     * a scalar value
+     * under the control of a mask.
+     *
+     * This is a masked lane-wise binary operation which
+     * selects each lane value from one or the other input.
+     *
+     * The returned result is equal to the expression
+     * {@code blend(broadcast(e),m)}.
+     *
+     * @param e the input scalar, containing the replacement lane value
+     * @param m the mask controlling lane selection of the scalar
+     * @return the result of blending the lane elements of this vector with
+     *         the scalar value
+     */
+    @ForceInline
+    public final HalffloatVector blend(short e,
+                                            VectorMask<Halffloat> m) {
+        return blend(broadcast(e), m);
+    }
+
+    /**
+     * Replaces selected lanes of this vector with
+     * a scalar value
+     * under the control of a mask.
+     *
+     * This is a masked lane-wise binary operation which
+     * selects each lane value from one or the other input.
+     *
+     * The returned result is equal to the expression
+     * {@code blend(broadcast(e),m)}.
+     *
+     * @param e the input scalar, containing the replacement lane value
+     * @param m the mask controlling lane selection of the scalar
+     * @return the result of blending the lane elements of this vector with
+     *         the scalar value
+     */
+    @ForceInline
+    public final HalffloatVector blend(long e,
+                                            VectorMask<Halffloat> m) {
+        return blend(broadcast(e), m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector slice(int origin, Vector<Halffloat> v1);
+
+    /*package-private*/
+    final
+    @ForceInline
+    HalffloatVector sliceTemplate(int origin, Vector<Halffloat> v1) {
+        HalffloatVector that = (HalffloatVector) v1;
+        that.check(this);
+        Objects.checkIndex(origin, length() + 1);
+        VectorShuffle<Halffloat> iota = iotaShuffle();
+        VectorMask<Halffloat> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
+        iota = iotaShuffle(origin, 1, true);
+        return that.rearrange(iota).blend(this.rearrange(iota), blendMask);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    HalffloatVector slice(int origin,
+                               Vector<Halffloat> w,
+                               VectorMask<Halffloat> m) {
+        return broadcast(0).blend(slice(origin, w), m);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector slice(int origin);
+
+    /*package-private*/
+    final
+    @ForceInline
+    HalffloatVector sliceTemplate(int origin) {
+        Objects.checkIndex(origin, length() + 1);
+        VectorShuffle<Halffloat> iota = iotaShuffle();
+        VectorMask<Halffloat> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin))));
+        iota = iotaShuffle(origin, 1, true);
+        return vspecies().zero().blend(this.rearrange(iota), blendMask);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector unslice(int origin, Vector<Halffloat> w, int part);
+
+    /*package-private*/
+    final
+    @ForceInline
+    HalffloatVector
+    unsliceTemplate(int origin, Vector<Halffloat> w, int part) {
+        HalffloatVector that = (HalffloatVector) w;
+        that.check(this);
+        Objects.checkIndex(origin, length() + 1);
+        VectorShuffle<Halffloat> iota = iotaShuffle();
+        VectorMask<Halffloat> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT,
+                                                                  (broadcast((short)(origin))));
+        iota = iotaShuffle(-origin, 1, true);
+        return that.blend(this.rearrange(iota), blendMask);
+    }
+
+    /*package-private*/
+    final
+    @ForceInline
+    <M extends VectorMask<Halffloat>>
+    HalffloatVector
+    unsliceTemplate(Class<M> maskType, int origin, Vector<Halffloat> w, int part, M m) {
+        HalffloatVector that = (HalffloatVector) w;
+        that.check(this);
+        HalffloatVector slice = that.sliceTemplate(origin, that);
+        slice = slice.blendTemplate(maskType, this, m);
+        return slice.unsliceTemplate(origin, w, part);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m);
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector unslice(int origin);
+
+    /*package-private*/
+    final
+    @ForceInline
+    HalffloatVector
+    unsliceTemplate(int origin) {
+        Objects.checkIndex(origin, length() + 1);
+        VectorShuffle<Halffloat> iota = iotaShuffle();
+        VectorMask<Halffloat> blendMask = iota.toVector().compare(VectorOperators.GE,
+                                                                  (broadcast((short)(origin))));
+        iota = iotaShuffle(-origin, 1, true);
+        return vspecies().zero().blend(this.rearrange(iota), blendMask);
+    }
+
+    private ArrayIndexOutOfBoundsException
+    wrongPartForSlice(int part) {
+        String msg = String.format("bad part number %d for slice operation",
+                                   part);
+        return new ArrayIndexOutOfBoundsException(msg);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector rearrange(VectorShuffle<Halffloat> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <S extends VectorShuffle<Halffloat>>
+    HalffloatVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
+        shuffle.checkIndexes();
+        return VectorSupport.rearrangeOp(
+            getClass(), shuffletype, null, Halffloat.class, length(),
+            this, shuffle, null,
+            (v1, s_, m_) -> v1.uOp((i, a) -> {
+                int ei = s_.laneSource(i);
+                return v1.lane(ei);
+            }));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector rearrange(VectorShuffle<Halffloat> s,
+                                   VectorMask<Halffloat> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <S extends VectorShuffle<Halffloat>, M extends VectorMask<Halffloat>>
+    HalffloatVector rearrangeTemplate(Class<S> shuffletype,
+                                           Class<M> masktype,
+                                           S shuffle,
+                                           M m) {
+
+        m.check(masktype, this);
+        VectorMask<Halffloat> valid = shuffle.laneIsValid();
+        if (m.andNot(valid).anyTrue()) {
+            shuffle.checkIndexes();
+            throw new AssertionError();
+        }
+        return VectorSupport.rearrangeOp(
+                   getClass(), shuffletype, masktype, Halffloat.class, length(),
+                   this, shuffle, m,
+                   (v1, s_, m_) -> v1.uOp((i, a) -> {
+                        int ei = s_.laneSource(i);
+                        return ei < 0  || !m_.laneIsSet(i) ? 0 : v1.lane(ei);
+                   }));
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector rearrange(VectorShuffle<Halffloat> s,
+                                   Vector<Halffloat> v);
+
+    /*package-private*/
+    @ForceInline
+    final
+    <S extends VectorShuffle<Halffloat>>
+    HalffloatVector rearrangeTemplate(Class<S> shuffletype,
+                                           S shuffle,
+                                           HalffloatVector v) {
+        VectorMask<Halffloat> valid = shuffle.laneIsValid();
+        @SuppressWarnings("unchecked")
+        S ws = (S) shuffle.wrapIndexes();
+        HalffloatVector r0 =
+            VectorSupport.rearrangeOp(
+                getClass(), shuffletype, null, Halffloat.class, length(),
+                this, ws, null,
+                (v0, s_, m_) -> v0.uOp((i, a) -> {
+                    int ei = s_.laneSource(i);
+                    return v0.lane(ei);
+                }));
+        HalffloatVector r1 =
+            VectorSupport.rearrangeOp(
+                getClass(), shuffletype, null, Halffloat.class, length(),
+                v, ws, null,
+                (v1, s_, m_) -> v1.uOp((i, a) -> {
+                    int ei = s_.laneSource(i);
+                    return v1.lane(ei);
+                }));
+        return r1.blend(r0, valid);
+    }
+
+    @ForceInline
+    private final
+    VectorShuffle<Halffloat> toShuffle0(HalffloatSpecies dsp) {
+        short[] a = toArray();
+        int[] sa = new int[a.length];
+        for (int i = 0; i < a.length; i++) {
+            sa[i] = (int) a[i];
+        }
+        return VectorShuffle.fromArray(dsp, sa, 0);
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    VectorShuffle<Halffloat> toShuffleTemplate(Class<?> shuffleType) {
+        HalffloatSpecies vsp = vspecies();
+        return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST,
+                                     getClass(), short.class, length(),
+                                     shuffleType, byte.class, length(),
+                                     this, vsp,
+                                     HalffloatVector::toShuffle0);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector selectFrom(Vector<Halffloat> v);
+
+    /*package-private*/
+    @ForceInline
+    final HalffloatVector selectFromTemplate(HalffloatVector v) {
+        return v.rearrange(this.toShuffle());
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    public abstract
+    HalffloatVector selectFrom(Vector<Halffloat> s, VectorMask<Halffloat> m);
+
+    /*package-private*/
+    @ForceInline
+    final HalffloatVector selectFromTemplate(HalffloatVector v,
+                                                  AbstractMask<Halffloat> m) {
+        return v.rearrange(this.toShuffle(), m);
+    }
+
+    /// Ternary operations
+
+
+    /**
+     * Multiplies this vector by a second input vector, and sums
+     * the result with a third.
+     *
+     * Extended precision is used for the intermediate result,
+     * avoiding possible loss of precision from rounding once
+     * for each of the two operations.
+     * The result is numerically close to {@code this.mul(b).add(c)},
+     * and is typically closer to the true mathematical result.
+     *
+     * This is a lane-wise ternary operation which applies an operation
+     * conforming to the specification of
+     * {@link Math#fma(short,short,short) Math.fma(a,b,c)}
+     * to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
+     *    lanewise}{@code (}{@link VectorOperators#FMA
+     *    FMA}{@code , b, c)}.
+     *
+     * @param b the second input vector, supplying multiplier values
+     * @param c the third input vector, supplying addend values
+     * @return the product of this vector and the second input vector
+     *         summed with the third input vector, using extended precision
+     *         for the intermediate result
+     * @see #fma(short,short)
+     * @see VectorOperators#FMA
+     * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector fma(Vector<Halffloat> b, Vector<Halffloat> c) {
+        return lanewise(FMA, b, c);
+    }
+
+    /**
+     * Multiplies this vector by a scalar multiplier, and sums
+     * the result with a scalar addend.
+     *
+     * Extended precision is used for the intermediate result,
+     * avoiding possible loss of precision from rounding once
+     * for each of the two operations.
+     * The result is numerically close to {@code this.mul(b).add(c)},
+     * and is typically closer to the true mathematical result.
+     *
+     * This is a lane-wise ternary operation which applies an operation
+     * conforming to the specification of
+     * {@link Math#fma(short,short,short) Math.fma(a,b,c)}
+     * to each lane.
+     *
+     * This method is also equivalent to the expression
+     * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
+     *    lanewise}{@code (}{@link VectorOperators#FMA
+     *    FMA}{@code , b, c)}.
+     *
+     * @param b the scalar multiplier
+     * @param c the scalar addend
+     * @return the product of this vector and the scalar multiplier
+     *         summed with scalar addend, using extended precision
+     *         for the intermediate result
+     * @see #fma(Vector,Vector)
+     * @see VectorOperators#FMA
+     * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
+     */
+    @ForceInline
+    public final
+    HalffloatVector fma(short b, short c) {
+        return lanewise(FMA, b, c);
+    }
+
+    // Don't bother with (Vector,short) and (short,Vector) overloadings.
+
+    // Type specific horizontal reductions
+
+    /**
+     * Returns a value accumulated from all the lanes of this vector.
+     *
+     * This is an associative cross-lane reduction operation which
+     * applies the specified operation to all the lane elements.
+     * <p>
+     * A few reduction operations do not support arbitrary reordering
+     * of their operands, yet are included here because of their
+     * usefulness.
+     * <ul>
+     * <li>
+     * In the case of {@code FIRST_NONZERO}, the reduction returns
+     * the value from the lowest-numbered non-zero lane.
+     * (As with {@code MAX} and {@code MIN}, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from
+     * the default value, positive zero. So a first-nonzero lane reduction
+     * might return {@code -0.0} even in the presence of non-zero
+     * lane values.)
+     * <li>
+     * In the case of {@code ADD} and {@code MUL}, the
+     * precise result will reflect the choice of an arbitrary order
+     * of operations, which may even vary over time.
+     * For further details see the section
+     * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
+     * <li>
+     * All other reduction operations are fully commutative and
+     * associative.  The implementation can choose any order of
+     * processing, yet it will always produce the same result.
+     * </ul>
+     *
+     * @param op the operation used to combine lane values
+     * @return the accumulated result
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #reduceLanes(VectorOperators.Associative,VectorMask)
+     * @see #add(Vector)
+     * @see #mul(Vector)
+     * @see #min(Vector)
+     * @see #max(Vector)
+     * @see VectorOperators#FIRST_NONZERO
+     */
+    public abstract short reduceLanes(VectorOperators.Associative op);
+
+    /**
+     * Returns a value accumulated from selected lanes of this vector,
+     * controlled by a mask.
+     *
+     * This is an associative cross-lane reduction operation which
+     * applies the specified operation to the selected lane elements.
+     * <p>
+     * If no elements are selected, an operation-specific identity
+     * value is returned.
+     * <ul>
+     * <li>
+     * If the operation is
+     *  {@code ADD}
+     * or {@code FIRST_NONZERO},
+     * then the identity value is positive zero, the default {@code short} value.
+     * <li>
+     * If the operation is {@code MUL},
+     * then the identity value is one.
+     * <li>
+     * If the operation is {@code MAX},
+     * then the identity value is {@code Halffloat.NEGATIVE_INFINITY}.
+     * <li>
+     * If the operation is {@code MIN},
+     * then the identity value is {@code Halffloat.POSITIVE_INFINITY}.
+     * </ul>
+     * <p>
+     * A few reduction operations do not support arbitrary reordering
+     * of their operands, yet are included here because of their
+     * usefulness.
+     * <ul>
+     * <li>
+     * In the case of {@code FIRST_NONZERO}, the reduction returns
+     * the value from the lowest-numbered non-zero lane.
+     * (As with {@code MAX} and {@code MIN}, floating point negative
+     * zero {@code -0.0} is treated as a value distinct from
+     * the default value, positive zero. So a first-nonzero lane reduction
+     * might return {@code -0.0} even in the presence of non-zero
+     * lane values.)
+     * <li>
+     * In the case of {@code ADD} and {@code MUL}, the
+     * precise result will reflect the choice of an arbitrary order
+     * of operations, which may even vary over time.
+     * For further details see the section
+     * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>.
+     * <li>
+     * All other reduction operations are fully commutative and
+     * associative.  The implementation can choose any order of
+     * processing, yet it will always produce the same result.
+     * </ul>
+     *
+     * @param op the operation used to combine lane values
+     * @param m the mask controlling lane selection
+     * @return the reduced result accumulated from the selected lane values
+     * @throws UnsupportedOperationException if this vector does
+     *         not support the requested operation
+     * @see #reduceLanes(VectorOperators.Associative)
+     */
+    public abstract short reduceLanes(VectorOperators.Associative op,
+                                       VectorMask<Halffloat> m);
+
+    /*package-private*/
+    @ForceInline
+    final
+    short reduceLanesTemplate(VectorOperators.Associative op,
+                               Class<? extends VectorMask<Halffloat>> maskClass,
+                               VectorMask<Halffloat> m) {
+        m.check(maskClass, this);
+        if (op == FIRST_NONZERO) {
+            HalffloatVector v = reduceIdentityVector(op).blend(this, m);
+            return v.reduceLanesTemplate(op);
+        }
+        int opc = opCode(op);
+        return fromBits(VectorSupport.reductionCoerced(
+            opc, getClass(), maskClass, Halffloat.class, length(),
+            this, m,
+            REDUCE_IMPL.find(op, opc, HalffloatVector::reductionOperations)));
+    }
+
+    /*package-private*/
+    @ForceInline
+    final
+    short reduceLanesTemplate(VectorOperators.Associative op) {
+        if (op == FIRST_NONZERO) {
+            // FIXME:  The JIT should handle this, and other scan ops alos.
+            VectorMask<Short> thisNZ
+                = this.viewAsIntegralLanes().compare(NE, (short) 0);
+            return this.lane(thisNZ.firstTrue());
+        }
+        int opc = opCode(op);
+        return fromBits(VectorSupport.reductionCoerced(
+            opc, getClass(), null, Halffloat.class, length(),
+            this, null,
+            REDUCE_IMPL.find(op, opc, HalffloatVector::reductionOperations)));
+    }
+
+    private static final
+    ImplCache<Associative, ReductionOperation<HalffloatVector, VectorMask<Halffloat>>>
+        REDUCE_IMPL = new ImplCache<>(Associative.class, HalffloatVector.class);
+
+    private static ReductionOperation<HalffloatVector, VectorMask<Halffloat>> reductionOperations(int opc_) {
+        switch (opc_) {
+            case VECTOR_OP_ADD: return (v, m) ->
+                    toBits(v.rOp((short)0, m, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() + Halffloat.valueOf(b).floatValue()))));
+            case VECTOR_OP_MUL: return (v, m) ->
+                    toBits(v.rOp((short)1, m, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() * Halffloat.valueOf(b).floatValue()))));
+            case VECTOR_OP_MIN: return (v, m) ->
+                    toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> Halffloat.valueOf(Math.min(Halffloat.valueOf(a).floatValue(), Halffloat.valueOf(b).floatValue()))));
+            case VECTOR_OP_MAX: return (v, m) ->
+                    toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> Halffloat.valueOf(Math.max(Halffloat.valueOf(a).floatValue(), Halffloat.valueOf(b).floatValue()))));
+            default: return null;
+        }
+    }
+
+    private
+    @ForceInline
+    HalffloatVector reduceIdentityVector(VectorOperators.Associative op) {
+        int opc = opCode(op);
+        UnaryOperator<HalffloatVector> fn
+            = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
+                switch (opc_) {
+                case VECTOR_OP_ADD:
+                    return v -> v.broadcast(0);
+                case VECTOR_OP_MUL:
+                    return v -> v.broadcast(1);
+                case VECTOR_OP_MIN:
+                    return v -> v.broadcast(MAX_OR_INF);
+                case VECTOR_OP_MAX:
+                    return v -> v.broadcast(MIN_OR_INF);
+                default: return null;
+                }
+            });
+        return fn.apply(this);
+    }
+    private static final
+    ImplCache<Associative,UnaryOperator<HalffloatVector>> REDUCE_ID_IMPL
+        = new ImplCache<>(Associative.class, HalffloatVector.class);
+
+    private static final short MIN_OR_INF = Halffloat.NEGATIVE_INFINITY;
+    private static final short MAX_OR_INF = Halffloat.POSITIVE_INFINITY;
+
+    public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
+    public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
+                                                     VectorMask<Halffloat> m);
+
+    // Type specific accessors
+
+    /**
+     * Gets the lane element at lane index {@code i}
+     *
+     * @param i the lane index
+     * @return the lane element at lane index {@code i}
+     * @throws IllegalArgumentException if the index is is out of range
+     * ({@code < 0 || >= length()})
+     */
+    public abstract short lane(int i);
+
+    /**
+     * Replaces the lane element of this vector at lane index {@code i} with
+     * value {@code e}.
+     *
+     * This is a cross-lane operation and behaves as if it returns the result
+     * of blending this vector with an input vector that is the result of
+     * broadcasting {@code e} and a mask that has only one lane set at lane
+     * index {@code i}.
+     *
+     * @param i the lane index of the lane element to be replaced
+     * @param e the value to be placed
+     * @return the result of replacing the lane element of this vector at lane
+     * index {@code i} with value {@code e}.
+     * @throws IllegalArgumentException if the index is is out of range
+     * ({@code < 0 || >= length()})
+     */
+    public abstract HalffloatVector withLane(int i, short e);
+
+    // Memory load operations
+
+    /**
+     * Returns an array of type {@code short[]}
+     * containing all the lane values.
+     * The array length is the same as the vector length.
+     * The array elements are stored in lane order.
+     * <p>
+     * This method behaves as if it stores
+     * this vector into an allocated array
+     * (using {@link #intoArray(short[], int) intoArray})
+     * and returns the array as follows:
+     * <pre>{@code
+     *   short[] a = new short[this.length()];
+     *   this.intoArray(a, 0);
+     *   return a;
+     * }</pre>
+     *
+     * @return an array containing the lane values of this vector
+     */
+    @ForceInline
+    @Override
+    public final short[] toArray() {
+        short[] a = new short[vspecies().laneCount()];
+        intoArray(a, 0);
+        return a;
+    }
+
+    /** {@inheritDoc} <!--workaround-->
+     */
+    @ForceInline
+    @Override
+    public final int[] toIntArray() {
+        short[] a = toArray();
+        int[] res = new int[a.length];
+        for (int i = 0; i < a.length; i++) {
+            short e = a[i];
+            res[i] = (int) HalffloatSpecies.toIntegralChecked(e, true);
+        }
+        return res;
+    }
+
+    /** {@inheritDoc} <!--workaround-->
+     */
+    @ForceInline
+    @Override
+    public final long[] toLongArray() {
+        short[] a = toArray();
+        long[] res = new long[a.length];
+        for (int i = 0; i < a.length; i++) {
+            short e = a[i];
+            res[i] = HalffloatSpecies.toIntegralChecked(e, false);
+        }
+        return res;
+    }
+
+    /** {@inheritDoc} <!--workaround-->
+     * @implNote
+     * When this method is used on used on vectors
+     * of type {@code HalffloatVector},
+     * there will be no loss of precision.
+     */
+    @ForceInline
+    @Override
+    public final double[] toDoubleArray() {
+        short[] a = toArray();
+        double[] res = new double[a.length];
+        for (int i = 0; i < a.length; i++) {
+            res[i] = (double) a[i];
+        }
+        return res;
+    }
+
+    /**
+     * Loads a vector from a byte array starting at an offset.
+     * Bytes are composed into primitive lane elements according
+     * to the specified byte order.
+     * The vector is arranged into lanes according to
+     * <a href="Vector.html#lane-order">memory ordering</a>.
+     * <p>
+     * This method behaves as if it returns the result of calling
+     * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
+     * fromByteBuffer()} as follows:
+     * <pre>{@code
+     * var bb = ByteBuffer.wrap(a);
+     * var m = species.maskAll(true);
+     * return fromByteBuffer(species, bb, offset, bo, m);
+     * }</pre>
+     *
+     * @param species species of desired vector
+     * @param a the byte array
+     * @param offset the offset into the array
+     * @param bo the intended byte order
+     * @return a vector loaded from a byte array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N*ESIZE < 0}
+     *         or {@code offset+(N+1)*ESIZE > a.length}
+     *         for any lane {@code N} in the vector
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromByteArray(VectorSpecies<Halffloat> species,
+                                       byte[] a, int offset,
+                                       ByteOrder bo) {
+        offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length);
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo);
+    }
+
+    /**
+     * Loads a vector from a byte array starting at an offset
+     * and using a mask.
+     * Lanes where the mask is unset are filled with the default
+     * value of {@code short} (positive zero).
+     * Bytes are composed into primitive lane elements according
+     * to the specified byte order.
+     * The vector is arranged into lanes according to
+     * <a href="Vector.html#lane-order">memory ordering</a>.
+     * <p>
+     * This method behaves as if it returns the result of calling
+     * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
+     * fromByteBuffer()} as follows:
+     * <pre>{@code
+     * var bb = ByteBuffer.wrap(a);
+     * return fromByteBuffer(species, bb, offset, bo, m);
+     * }</pre>
+     *
+     * @param species species of desired vector
+     * @param a the byte array
+     * @param offset the offset into the array
+     * @param bo the intended byte order
+     * @param m the mask controlling lane selection
+     * @return a vector loaded from a byte array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N*ESIZE < 0}
+     *         or {@code offset+(N+1)*ESIZE > a.length}
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromByteArray(VectorSpecies<Halffloat> species,
+                                       byte[] a, int offset,
+                                       ByteOrder bo,
+                                       VectorMask<Halffloat> m) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) {
+            return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo);
+        }
+
+        // FIXME: optimize
+        checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
+        ByteBuffer wb = wrapper(a, bo);
+        return vsp.ldOp(wb, offset, (AbstractMask<Halffloat>)m,
+                   (wb_, o, i)  -> wb_.getShort(o + i * 2));
+    }
+
+    /**
+     * Loads a vector from an array of type {@code short[]}
+     * starting at an offset.
+     * For each vector lane, where {@code N} is the vector lane index, the
+     * array element at index {@code offset + N} is placed into the
+     * resulting vector at lane index {@code N}.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array
+     * @return the vector loaded from an array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromArray(VectorSpecies<Halffloat> species,
+                                   short[] a, int offset) {
+        offset = checkFromIndexSize(offset, species.length(), a.length);
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.dummyVector().fromArray0(a, offset);
+    }
+
+    /**
+     * Loads a vector from an array of type {@code short[]}
+     * starting at an offset and using a mask.
+     * Lanes where the mask is unset are filled with the default
+     * value of {@code short} (positive zero).
+     * For each vector lane, where {@code N} is the vector lane index,
+     * if the mask lane at index {@code N} is set then the array element at
+     * index {@code offset + N} is placed into the resulting vector at lane index
+     * {@code N}, otherwise the default element value is placed into the
+     * resulting vector at lane index {@code N}.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array
+     * @param m the mask controlling lane selection
+     * @return the vector loaded from an array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromArray(VectorSpecies<Halffloat> species,
+                                   short[] a, int offset,
+                                   VectorMask<Halffloat> m) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        if (offset >= 0 && offset <= (a.length - species.length())) {
+            return vsp.dummyVector().fromArray0(a, offset, m);
+        }
+
+        // FIXME: optimize
+        checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
+        return vsp.vOp(m, i -> a[offset + i]);
+    }
+
+    /**
+     * Gathers a new vector composed of elements from an array of type
+     * {@code short[]},
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane is loaded from the array
+     * element {@code a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array, may be negative if relative
+     * indexes in the index map compensate to produce a value within the
+     * array bounds
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @return the vector loaded from the indexed elements of the array
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromArray(VectorSpecies<Halffloat> species,
+                                   short[] a, int offset,
+                                   int[] indexMap, int mapOffset) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
+    }
+
+    /**
+     * Gathers a new vector composed of elements from an array of type
+     * {@code short[]},
+     * under the control of a mask, and
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * if the lane is set in the mask,
+     * the lane is loaded from the array
+     * element {@code a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     * Unset lanes in the resulting vector are set to zero.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array, may be negative if relative
+     * indexes in the index map compensate to produce a value within the
+     * array bounds
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @param m the mask controlling lane selection
+     * @return the vector loaded from the indexed elements of the array
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromArray(VectorSpecies<Halffloat> species,
+                                   short[] a, int offset,
+                                   int[] indexMap, int mapOffset,
+                                   VectorMask<Halffloat> m) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
+    }
+
+    /**
+     * Loads a vector from an array of type {@code char[]}
+     * starting at an offset.
+     * For each vector lane, where {@code N} is the vector lane index, the
+     * array element at index {@code offset + N}
+     * is first cast to a {@code short} value and then
+     * placed into the resulting vector at lane index {@code N}.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array
+     * @return the vector loaded from an array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromCharArray(VectorSpecies<Halffloat> species,
+                                       char[] a, int offset) {
+        offset = checkFromIndexSize(offset, species.length(), a.length);
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.dummyVector().fromCharArray0(a, offset);
+    }
+
+    /**
+     * Loads a vector from an array of type {@code char[]}
+     * starting at an offset and using a mask.
+     * Lanes where the mask is unset are filled with the default
+     * value of {@code short} (positive zero).
+     * For each vector lane, where {@code N} is the vector lane index,
+     * if the mask lane at index {@code N} is set then the array element at
+     * index {@code offset + N}
+     * is first cast to a {@code short} value and then
+     * placed into the resulting vector at lane index
+     * {@code N}, otherwise the default element value is placed into the
+     * resulting vector at lane index {@code N}.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array
+     * @param m the mask controlling lane selection
+     * @return the vector loaded from an array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromCharArray(VectorSpecies<Halffloat> species,
+                                       char[] a, int offset,
+                                       VectorMask<Halffloat> m) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        if (offset >= 0 && offset <= (a.length - species.length())) {
+            return vsp.dummyVector().fromCharArray0(a, offset, m);
+        }
+
+        // FIXME: optimize
+        checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
+        return vsp.vOp(m, i -> (short) a[offset + i]);
+    }
+
+    /**
+     * Gathers a new vector composed of elements from an array of type
+     * {@code char[]},
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane is loaded from the expression
+     * {@code (short) a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array, may be negative if relative
+     * indexes in the index map compensate to produce a value within the
+     * array bounds
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @return the vector loaded from the indexed elements of the array
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromCharArray(VectorSpecies<Halffloat> species,
+                                       char[] a, int offset,
+                                       int[] indexMap, int mapOffset) {
+        // FIXME: optimize
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.vOp(n -> (short) a[offset + indexMap[mapOffset + n]]);
+    }
+
+    /**
+     * Gathers a new vector composed of elements from an array of type
+     * {@code char[]},
+     * under the control of a mask, and
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * if the lane is set in the mask,
+     * the lane is loaded from the expression
+     * {@code (short) a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     * Unset lanes in the resulting vector are set to zero.
+     *
+     * @param species species of desired vector
+     * @param a the array
+     * @param offset the offset into the array, may be negative if relative
+     * indexes in the index map compensate to produce a value within the
+     * array bounds
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @param m the mask controlling lane selection
+     * @return the vector loaded from the indexed elements of the array
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromCharArray(VectorSpecies<Halffloat> species,
+                                       char[] a, int offset,
+                                       int[] indexMap, int mapOffset,
+                                       VectorMask<Halffloat> m) {
+        // FIXME: optimize
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.vOp(m, n -> (short) a[offset + indexMap[mapOffset + n]]);
+    }
+
+
+    /**
+     * Loads a vector from a {@linkplain ByteBuffer byte buffer}
+     * starting at an offset into the byte buffer.
+     * Bytes are composed into primitive lane elements according
+     * to the specified byte order.
+     * The vector is arranged into lanes according to
+     * <a href="Vector.html#lane-order">memory ordering</a>.
+     * <p>
+     * This method behaves as if it returns the result of calling
+     * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
+     * fromByteBuffer()} as follows:
+     * <pre>{@code
+     * var m = species.maskAll(true);
+     * return fromByteBuffer(species, bb, offset, bo, m);
+     * }</pre>
+     *
+     * @param species species of desired vector
+     * @param bb the byte buffer
+     * @param offset the offset into the byte buffer
+     * @param bo the intended byte order
+     * @return a vector loaded from a byte buffer
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N*2 < 0}
+     *         or {@code offset+N*2 >= bb.limit()}
+     *         for any lane {@code N} in the vector
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromByteBuffer(VectorSpecies<Halffloat> species,
+                                        ByteBuffer bb, int offset,
+                                        ByteOrder bo) {
+        offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit());
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo);
+    }
+
+    /**
+     * Loads a vector from a {@linkplain ByteBuffer byte buffer}
+     * starting at an offset into the byte buffer
+     * and using a mask.
+     * Lanes where the mask is unset are filled with the default
+     * value of {@code short} (positive zero).
+     * Bytes are composed into primitive lane elements according
+     * to the specified byte order.
+     * The vector is arranged into lanes according to
+     * <a href="Vector.html#lane-order">memory ordering</a>.
+     * <p>
+     * The following pseudocode illustrates the behavior:
+     * <pre>{@code
+     * HalffloatBuffer eb = bb.duplicate()
+     *     .position(offset)
+     *     .order(bo).asHalffloatBuffer();
+     * short[] ar = new short[species.length()];
+     * for (int n = 0; n < ar.length; n++) {
+     *     if (m.laneIsSet(n)) {
+     *         ar[n] = eb.get(n);
+     *     }
+     * }
+     * HalffloatVector r = HalffloatVector.fromArray(species, ar, 0);
+     * }</pre>
+     * @implNote
+     * This operation is likely to be more efficient if
+     * the specified byte order is the same as
+     * {@linkplain ByteOrder#nativeOrder()
+     * the platform native order},
+     * since this method will not need to reorder
+     * the bytes of lane values.
+     *
+     * @param species species of desired vector
+     * @param bb the byte buffer
+     * @param offset the offset into the byte buffer
+     * @param bo the intended byte order
+     * @param m the mask controlling lane selection
+     * @return a vector loaded from a byte buffer
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N*2 < 0}
+     *         or {@code offset+N*2 >= bb.limit()}
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     */
+    @ForceInline
+    public static
+    HalffloatVector fromByteBuffer(VectorSpecies<Halffloat> species,
+                                        ByteBuffer bb, int offset,
+                                        ByteOrder bo,
+                                        VectorMask<Halffloat> m) {
+        HalffloatSpecies vsp = (HalffloatSpecies) species;
+        if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) {
+            return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo);
+        }
+
+        // FIXME: optimize
+        checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
+        ByteBuffer wb = wrapper(bb, bo);
+        return vsp.ldOp(wb, offset, (AbstractMask<Halffloat>)m,
+                   (wb_, o, i)  -> wb_.getShort(o + i * 2));
+    }
+
+    // Memory store operations
+
+    /**
+     * Stores this vector into an array of type {@code short[]}
+     * starting at an offset.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane element at index {@code N} is stored into the array
+     * element {@code a[offset+N]}.
+     *
+     * @param a the array, of type {@code short[]}
+     * @param offset the offset into the array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     */
+    @ForceInline
+    public final
+    void intoArray(short[] a, int offset) {
+        offset = checkFromIndexSize(offset, length(), a.length);
+        HalffloatSpecies vsp = vspecies();
+        VectorSupport.store(
+            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+            a, arrayAddress(a, offset),
+            this,
+            a, offset,
+            (arr, off, v)
+            -> v.stOp(arr, off,
+                      (arr_, off_, i, e) -> arr_[off_ + i] = e));
+    }
+
+    /**
+     * Stores this vector into an array of type {@code short[]}
+     * starting at offset and using a mask.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane element at index {@code N} is stored into the array
+     * element {@code a[offset+N]}.
+     * If the mask lane at {@code N} is unset then the corresponding
+     * array element {@code a[offset+N]} is left unchanged.
+     * <p>
+     * Array range checking is done for lanes where the mask is set.
+     * Lanes where the mask is unset are not stored and do not need
+     * to correspond to legitimate elements of {@code a}.
+     * That is, unset lanes may correspond to array indexes less than
+     * zero or beyond the end of the array.
+     *
+     * @param a the array, of type {@code short[]}
+     * @param offset the offset into the array
+     * @param m the mask controlling lane storage
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     */
+    @ForceInline
+    public final
+    void intoArray(short[] a, int offset,
+                   VectorMask<Halffloat> m) {
+        if (m.allTrue()) {
+            intoArray(a, offset);
+        } else {
+            HalffloatSpecies vsp = vspecies();
+            checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
+            intoArray0(a, offset, m);
+        }
+    }
+
+    /**
+     * Scatters this vector into an array of type {@code short[]}
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane element at index {@code N} is stored into the array
+     * element {@code a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     *
+     * @param a the array
+     * @param offset an offset to combine with the index map offsets
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public final
+    void intoArray(short[] a, int offset,
+                   int[] indexMap, int mapOffset) {
+        stOp(a, offset,
+             (arr, off, i, e) -> {
+                 int j = indexMap[mapOffset + i];
+                 arr[off + j] = e;
+             });
+    }
+
+    /**
+     * Scatters this vector into an array of type {@code short[]},
+     * under the control of a mask, and
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * if the mask lane at index {@code N} is set then
+     * the lane element at index {@code N} is stored into the array
+     * element {@code a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     *
+     * @param a the array
+     * @param offset an offset to combine with the index map offsets
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @param m the mask
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public final
+    void intoArray(short[] a, int offset,
+                   int[] indexMap, int mapOffset,
+                   VectorMask<Halffloat> m) {
+        stOp(a, offset, m,
+             (arr, off, i, e) -> {
+                 int j = indexMap[mapOffset + i];
+                 arr[off + j] = e;
+             });
+    }
+
+    /**
+     * Stores this vector into an array of type {@code char[]}
+     * starting at an offset.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane element at index {@code N}
+     * is first cast to a {@code char} value and then
+     * stored into the array element {@code a[offset+N]}.
+     *
+     * @param a the array, of type {@code char[]}
+     * @param offset the offset into the array
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     */
+    @ForceInline
+    public final
+    void intoCharArray(char[] a, int offset) {
+        offset = checkFromIndexSize(offset, length(), a.length);
+        HalffloatSpecies vsp = vspecies();
+        VectorSupport.store(
+            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+            a, charArrayAddress(a, offset),
+            this,
+            a, offset,
+            (arr, off, v)
+            -> v.stOp(arr, off,
+                      (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
+    }
+
+    /**
+     * Stores this vector into an array of type {@code char[]}
+     * starting at offset and using a mask.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane element at index {@code N}
+     * is first cast to a {@code char} value and then
+     * stored into the array element {@code a[offset+N]}.
+     * If the mask lane at {@code N} is unset then the corresponding
+     * array element {@code a[offset+N]} is left unchanged.
+     * <p>
+     * Array range checking is done for lanes where the mask is set.
+     * Lanes where the mask is unset are not stored and do not need
+     * to correspond to legitimate elements of {@code a}.
+     * That is, unset lanes may correspond to array indexes less than
+     * zero or beyond the end of the array.
+     *
+     * @param a the array, of type {@code char[]}
+     * @param offset the offset into the array
+     * @param m the mask controlling lane storage
+     * @throws IndexOutOfBoundsException
+     *         if {@code offset+N < 0} or {@code offset+N >= a.length}
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     */
+    @ForceInline
+    public final
+    void intoCharArray(char[] a, int offset,
+                       VectorMask<Halffloat> m) {
+        if (m.allTrue()) {
+            intoCharArray(a, offset);
+        } else {
+            HalffloatSpecies vsp = vspecies();
+            checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
+            intoCharArray0(a, offset, m);
+        }
+    }
+
+    /**
+     * Scatters this vector into an array of type {@code char[]}
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * the lane element at index {@code N}
+     * is first cast to a {@code char} value and then
+     * stored into the array
+     * element {@code a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     *
+     * @param a the array
+     * @param offset an offset to combine with the index map offsets
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public final
+    void intoCharArray(char[] a, int offset,
+                       int[] indexMap, int mapOffset) {
+        // FIXME: optimize
+        stOp(a, offset,
+             (arr, off, i, e) -> {
+                 int j = indexMap[mapOffset + i];
+                 arr[off + j] = (char) e;
+             });
+    }
+
+    /**
+     * Scatters this vector into an array of type {@code char[]},
+     * under the control of a mask, and
+     * using indexes obtained by adding a fixed {@code offset} to a
+     * series of secondary offsets from an <em>index map</em>.
+     * The index map is a contiguous sequence of {@code VLENGTH}
+     * elements in a second array of {@code int}s, starting at a given
+     * {@code mapOffset}.
+     * <p>
+     * For each vector lane, where {@code N} is the vector lane index,
+     * if the mask lane at index {@code N} is set then
+     * the lane element at index {@code N}
+     * is first cast to a {@code char} value and then
+     * stored into the array
+     * element {@code a[f(N)]}, where {@code f(N)} is the
+     * index mapping expression
+     * {@code offset + indexMap[mapOffset + N]]}.
+     *
+     * @param a the array
+     * @param offset an offset to combine with the index map offsets
+     * @param indexMap the index map
+     * @param mapOffset the offset into the index map
+     * @param m the mask
+     * @throws IndexOutOfBoundsException
+     *         if {@code mapOffset+N < 0}
+     *         or if {@code mapOffset+N >= indexMap.length},
+     *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
+     *         is an invalid index into {@code a},
+     *         for any lane {@code N} in the vector
+     *         where the mask is set
+     * @see HalffloatVector#toIntArray()
+     */
+    @ForceInline
+    public final
+    void intoCharArray(char[] a, int offset,
+                       int[] indexMap, int mapOffset,
+                       VectorMask<Halffloat> m) {
+        // FIXME: optimize
+        stOp(a, offset, m,
+             (arr, off, i, e) -> {
+                 int j = indexMap[mapOffset + i];
+                 arr[off + j] = (char) e;
+             });
+    }
+
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    void intoByteArray(byte[] a, int offset,
+                       ByteOrder bo) {
+        offset = checkFromIndexSize(offset, byteSize(), a.length);
+        maybeSwap(bo).intoByteArray0(a, offset);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    void intoByteArray(byte[] a, int offset,
+                       ByteOrder bo,
+                       VectorMask<Halffloat> m) {
+        if (m.allTrue()) {
+            intoByteArray(a, offset, bo);
+        } else {
+            HalffloatSpecies vsp = vspecies();
+            checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
+            maybeSwap(bo).intoByteArray0(a, offset, m);
+        }
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    void intoByteBuffer(ByteBuffer bb, int offset,
+                        ByteOrder bo) {
+        if (ScopedMemoryAccess.isReadOnly(bb)) {
+            throw new ReadOnlyBufferException();
+        }
+        offset = checkFromIndexSize(offset, byteSize(), bb.limit());
+        maybeSwap(bo).intoByteBuffer0(bb, offset);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    void intoByteBuffer(ByteBuffer bb, int offset,
+                        ByteOrder bo,
+                        VectorMask<Halffloat> m) {
+        if (m.allTrue()) {
+            intoByteBuffer(bb, offset, bo);
+        } else {
+            if (bb.isReadOnly()) {
+                throw new ReadOnlyBufferException();
+            }
+            HalffloatSpecies vsp = vspecies();
+            checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
+            maybeSwap(bo).intoByteBuffer0(bb, offset, m);
+        }
+    }
+
+    // ================================================
+
+    // Low-level memory operations.
+    //
+    // Note that all of these operations *must* inline into a context
+    // where the exact species of the involved vector is a
+    // compile-time constant.  Otherwise, the intrinsic generation
+    // will fail and performance will suffer.
+    //
+    // In many cases this is achieved by re-deriving a version of the
+    // method in each concrete subclass (per species).  The re-derived
+    // method simply calls one of these generic methods, with exact
+    // parameters for the controlling metadata, which is either a
+    // typed vector or constant species instance.
+
+    // Unchecked loading operations in native byte order.
+    // Caller is responsible for applying index checks, masking, and
+    // byte swapping.
+
+    /*package-private*/
+    abstract
+    HalffloatVector fromArray0(short[] a, int offset);
+    @ForceInline
+    final
+    HalffloatVector fromArray0Template(short[] a, int offset) {
+        HalffloatSpecies vsp = vspecies();
+        return VectorSupport.load(
+            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+            a, arrayAddress(a, offset),
+            a, offset, vsp,
+            (arr, off, s) -> s.ldOp(arr, off,
+                                    (arr_, off_, i) -> arr_[off_ + i]));
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    HalffloatVector fromArray0Template(Class<M> maskClass, short[] a, int offset, M m) {
+        m.check(species());
+        HalffloatSpecies vsp = vspecies();
+        return VectorSupport.loadMasked(
+            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+            a, arrayAddress(a, offset), m,
+            a, offset, vsp,
+            (arr, off, s, vm) -> s.ldOp(arr, off, vm,
+                                        (arr_, off_, i) -> arr_[off_ + i]));
+    }
+
+
+    /*package-private*/
+    abstract
+    HalffloatVector fromCharArray0(char[] a, int offset);
+    @ForceInline
+    final
+    HalffloatVector fromCharArray0Template(char[] a, int offset) {
+        HalffloatSpecies vsp = vspecies();
+        return VectorSupport.load(
+            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+            a, charArrayAddress(a, offset),
+            a, offset, vsp,
+            (arr, off, s) -> s.ldOp(arr, off,
+                                    (arr_, off_, i) -> (short) arr_[off_ + i]));
+    }
+
+    /*package-private*/
+    abstract
+    HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    HalffloatVector fromCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) {
+        m.check(species());
+        HalffloatSpecies vsp = vspecies();
+        return VectorSupport.loadMasked(
+                vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+                a, charArrayAddress(a, offset), m,
+                a, offset, vsp,
+                (arr, off, s, vm) -> s.ldOp(arr, off, vm,
+                                            (arr_, off_, i) -> (short) arr_[off_ + i]));
+    }
+
+
+    @Override
+    abstract
+    HalffloatVector fromByteArray0(byte[] a, int offset);
+    @ForceInline
+    final
+    HalffloatVector fromByteArray0Template(byte[] a, int offset) {
+        HalffloatSpecies vsp = vspecies();
+        return VectorSupport.load(
+            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+            a, byteArrayAddress(a, offset),
+            a, offset, vsp,
+            (arr, off, s) -> {
+                ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
+                return s.ldOp(wb, off,
+                        (wb_, o, i) -> wb_.getShort(o + i * 2));
+            });
+    }
+
+    abstract
+    HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    HalffloatVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
+        HalffloatSpecies vsp = vspecies();
+        m.check(vsp);
+        return VectorSupport.loadMasked(
+            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+            a, byteArrayAddress(a, offset), m,
+            a, offset, vsp,
+            (arr, off, s, vm) -> {
+                ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
+                return s.ldOp(wb, off, vm,
+                        (wb_, o, i) -> wb_.getShort(o + i * 2));
+            });
+    }
+
+    abstract
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset);
+    @ForceInline
+    final
+    HalffloatVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
+        HalffloatSpecies vsp = vspecies();
+        return ScopedMemoryAccess.loadFromByteBuffer(
+                vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+                bb, offset, vsp,
+                (buf, off, s) -> {
+                    ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
+                    return s.ldOp(wb, off,
+                            (wb_, o, i) -> wb_.getShort(o + i * 2));
+                });
+    }
+
+    abstract
+    HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    HalffloatVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
+        HalffloatSpecies vsp = vspecies();
+        m.check(vsp);
+        return ScopedMemoryAccess.loadFromByteBufferMasked(
+                vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+                bb, offset, m, vsp,
+                (buf, off, s, vm) -> {
+                    ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
+                    return s.ldOp(wb, off, vm,
+                            (wb_, o, i) -> wb_.getShort(o + i * 2));
+                });
+    }
+
+    // Unchecked storing operations in native byte order.
+    // Caller is responsible for applying index checks, masking, and
+    // byte swapping.
+
+    abstract
+    void intoArray0(short[] a, int offset);
+    @ForceInline
+    final
+    void intoArray0Template(short[] a, int offset) {
+        HalffloatSpecies vsp = vspecies();
+        VectorSupport.store(
+            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+            a, arrayAddress(a, offset),
+            this, a, offset,
+            (arr, off, v)
+            -> v.stOp(arr, off,
+                      (arr_, off_, i, e) -> arr_[off_+i] = e));
+    }
+
+    abstract
+    void intoArray0(short[] a, int offset, VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    void intoArray0Template(Class<M> maskClass, short[] a, int offset, M m) {
+        m.check(species());
+        HalffloatSpecies vsp = vspecies();
+        VectorSupport.storeMasked(
+            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+            a, arrayAddress(a, offset),
+            this, m, a, offset,
+            (arr, off, v, vm)
+            -> v.stOp(arr, off, vm,
+                      (arr_, off_, i, e) -> arr_[off_ + i] = e));
+    }
+
+
+
+    abstract
+    void intoByteArray0(byte[] a, int offset);
+    @ForceInline
+    final
+    void intoByteArray0Template(byte[] a, int offset) {
+        HalffloatSpecies vsp = vspecies();
+        VectorSupport.store(
+            vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+            a, byteArrayAddress(a, offset),
+            this, a, offset,
+            (arr, off, v) -> {
+                ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
+                v.stOp(wb, off,
+                        (tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
+            });
+    }
+
+    abstract
+    void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
+        HalffloatSpecies vsp = vspecies();
+        m.check(vsp);
+        VectorSupport.storeMasked(
+            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+            a, byteArrayAddress(a, offset),
+            this, m, a, offset,
+            (arr, off, v, vm) -> {
+                ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
+                v.stOp(wb, off, vm,
+                        (tb_, o, i, e) -> tb_.putShort(o + i * 2, e));
+            });
+    }
+
+    @ForceInline
+    final
+    void intoByteBuffer0(ByteBuffer bb, int offset) {
+        HalffloatSpecies vsp = vspecies();
+        ScopedMemoryAccess.storeIntoByteBuffer(
+                vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
+                this, bb, offset,
+                (buf, off, v) -> {
+                    ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
+                    v.stOp(wb, off,
+                            (wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
+                });
+    }
+
+    abstract
+    void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) {
+        HalffloatSpecies vsp = vspecies();
+        m.check(vsp);
+        ScopedMemoryAccess.storeIntoByteBufferMasked(
+                vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+                this, m, bb, offset,
+                (buf, off, v, vm) -> {
+                    ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
+                    v.stOp(wb, off, vm,
+                            (wb_, o, i, e) -> wb_.putShort(o + i * 2, e));
+                });
+    }
+
+    /*package-private*/
+    abstract
+    void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m);
+    @ForceInline
+    final
+    <M extends VectorMask<Halffloat>>
+    void intoCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) {
+        m.check(species());
+        HalffloatSpecies vsp = vspecies();
+        VectorSupport.storeMasked(
+            vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
+            a, charArrayAddress(a, offset),
+            this, m, a, offset,
+            (arr, off, v, vm)
+            -> v.stOp(arr, off, vm,
+                      (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
+    }
+
+    // End of low-level memory operations.
+
+    private static
+    void checkMaskFromIndexSize(int offset,
+                                HalffloatSpecies vsp,
+                                VectorMask<Halffloat> m,
+                                int scale,
+                                int limit) {
+        ((AbstractMask<Halffloat>)m)
+            .checkIndexByLane(offset, limit, vsp.iota(), scale);
+    }
+
+    @ForceInline
+    private void conditionalStoreNYI(int offset,
+                                     HalffloatSpecies vsp,
+                                     VectorMask<Halffloat> m,
+                                     int scale,
+                                     int limit) {
+        if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
+            String msg =
+                String.format("unimplemented: store @%d in [0..%d), %s in %s",
+                              offset, limit, m, vsp);
+            throw new AssertionError(msg);
+        }
+    }
+
+    /*package-private*/
+    @Override
+    @ForceInline
+    final
+    HalffloatVector maybeSwap(ByteOrder bo) {
+        if (bo != NATIVE_ENDIAN) {
+            return this.reinterpretAsBytes()
+                .rearrange(swapBytesShuffle())
+                .reinterpretAsHalffloats();
+        }
+        return this;
+    }
+
+    static final int ARRAY_SHIFT =
+        31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_SHORT_INDEX_SCALE);
+    static final long ARRAY_BASE =
+        Unsafe.ARRAY_SHORT_BASE_OFFSET;
+
+    @ForceInline
+    static long arrayAddress(short[] a, int index) {
+        return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
+    }
+
+    static final int ARRAY_CHAR_SHIFT =
+            31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_CHAR_INDEX_SCALE);
+    static final long ARRAY_CHAR_BASE =
+            Unsafe.ARRAY_CHAR_BASE_OFFSET;
+
+    @ForceInline
+    static long charArrayAddress(char[] a, int index) {
+        return ARRAY_CHAR_BASE + (((long)index) << ARRAY_CHAR_SHIFT);
+    }
+
+
+    @ForceInline
+    static long byteArrayAddress(byte[] a, int index) {
+        return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
+    }
+
+    // ================================================
+
+    /// Reinterpreting view methods:
+    //   lanewise reinterpret: viewAsXVector()
+    //   keep shape, redraw lanes: reinterpretAsEs()
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @ForceInline
+    @Override
+    public final ByteVector reinterpretAsBytes() {
+         // Going to ByteVector, pay close attention to byte order.
+         assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
+         return asByteVectorRaw();
+         //return asByteVectorRaw().rearrange(swapBytesShuffle());
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @ForceInline
+    @Override
+    public final ShortVector viewAsIntegralLanes() {
+        LaneType ilt = LaneType.SHORT.asIntegral();
+        return (ShortVector) asVectorRaw(ilt);
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     *
+     * @implNote This method always throws
+     * {@code UnsupportedOperationException}, because there is no floating
+     * point type of the same size as {@code short}.  The return type
+     * of this method is arbitrarily designated as
+     * {@code Vector<?>}.  Future versions of this API may change the return
+     * type if additional floating point types become available.
+     */
+    @ForceInline
+    @Override
+    public final
+    HalffloatVector
+    viewAsFloatingLanes() {
+        return this;
+    }
+
+    // ================================================
+
+    /// Object methods: toString, equals, hashCode
+    //
+    // Object methods are defined as if via Arrays.toString, etc.,
+    // is applied to the array of elements.  Two equal vectors
+    // are required to have equal species and equal lane values.
+
+    /**
+     * Returns a string representation of this vector, of the form
+     * {@code "[0,1,2...]"}, reporting the lane values of this vector,
+     * in lane order.
+     *
+     * The string is produced as if by a call to {@link
+     * java.util.Arrays#toString(short[]) Arrays.toString()},
+     * as appropriate to the {@code short} array returned by
+     * {@link #toArray this.toArray()}.
+     *
+     * @return a string of the form {@code "[0,1,2...]"}
+     * reporting the lane values of this vector
+     */
+    @Override
+    @ForceInline
+    public final
+    String toString() {
+        // now that toArray is strongly typed, we can define this
+        return Arrays.toString(toArray());
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    boolean equals(Object obj) {
+        if (obj instanceof Vector) {
+            Vector<?> that = (Vector<?>) obj;
+            if (this.species().equals(that.species())) {
+                return this.eq(that.check(this.species())).allTrue();
+            }
+        }
+        return false;
+    }
+
+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public final
+    int hashCode() {
+        // now that toArray is strongly typed, we can define this
+        return Objects.hash(species(), Arrays.hashCode(toArray()));
+    }
+
+    // ================================================
+
+    // Species
+
+    /**
+     * Class representing {@link HalffloatVector}'s of the same {@link VectorShape VectorShape}.
+     */
+    /*package-private*/
+    static final class HalffloatSpecies extends AbstractSpecies<Halffloat> {
+        private HalffloatSpecies(VectorShape shape,
+                Class<? extends HalffloatVector> vectorType,
+                Class<? extends AbstractMask<Halffloat>> maskType,
+                Function<Object, HalffloatVector> vectorFactory) {
+            super(shape, LaneType.of(short.class),
+                  vectorType, maskType,
+                  vectorFactory);
+            assert(this.elementSize() == Halffloat.SIZE);
+        }
+
+        // Specializing overrides:
+
+        @Override
+        @ForceInline
+        public final Class<Halffloat> elementType() {
+            return Halffloat.class;
+        }
+
+        @Override
+        @ForceInline
+        final Class<Halffloat> genericElementType() {
+            return Halffloat.class;
+        }
+
+        @SuppressWarnings("unchecked")
+        @Override
+        @ForceInline
+        public final Class<? extends HalffloatVector> vectorType() {
+            return (Class<? extends HalffloatVector>) vectorType;
+        }
+
+        @Override
+        @ForceInline
+        public final long checkValue(long e) {
+            longToElementBits(e);  // only for exception
+            return e;
+        }
+
+        /*package-private*/
+        @Override
+        @ForceInline
+        final HalffloatVector broadcastBits(long bits) {
+            return (HalffloatVector)
+                VectorSupport.broadcastCoerced(
+                    vectorType, Halffloat.class, laneCount,
+                    bits, this,
+                    (bits_, s_) -> s_.rvOp(i -> bits_));
+        }
+
+        /*package-private*/
+        @ForceInline
+        final HalffloatVector broadcast(short e) {
+            return broadcastBits(toBits(e));
+        }
+
+        @Override
+        @ForceInline
+        public final HalffloatVector broadcast(long e) {
+            return broadcastBits(longToElementBits(e));
+        }
+
+        /*package-private*/
+        final @Override
+        @ForceInline
+        long longToElementBits(long value) {
+            // Do the conversion, and then test it for failure.
+            short e = (short) value;
+            if ((long) e != value) {
+                throw badElementBits(value, e);
+            }
+            return toBits(e);
+        }
+
+        /*package-private*/
+        @ForceInline
+        static long toIntegralChecked(short e, boolean convertToInt) {
+            long value = convertToInt ? (int) e : (long) e;
+            if ((short) value != e) {
+                throw badArrayBits(e, convertToInt, value);
+            }
+            return value;
+        }
+
+        /* this non-public one is for internal conversions */
+        @Override
+        @ForceInline
+        final HalffloatVector fromIntValues(int[] values) {
+            VectorIntrinsics.requireLength(values.length, laneCount);
+            short[] va = new short[laneCount()];
+            for (int i = 0; i < va.length; i++) {
+                int lv = values[i];
+                short v = (short) lv;
+                va[i] = v;
+                if ((int)v != lv) {
+                    throw badElementBits(lv, v);
+                }
+            }
+            return dummyVector().fromArray0(va, 0);
+        }
+
+        // Virtual constructors
+
+        @ForceInline
+        @Override final
+        public HalffloatVector fromArray(Object a, int offset) {
+            // User entry point:  Be careful with inputs.
+            return HalffloatVector
+                .fromArray(this, (short[]) a, offset);
+        }
+
+        @ForceInline
+        @Override final
+        HalffloatVector dummyVector() {
+            return (HalffloatVector) super.dummyVector();
+        }
+
+        /*package-private*/
+        final @Override
+        @ForceInline
+        HalffloatVector rvOp(RVOp f) {
+            short[] res = new short[laneCount()];
+            for (int i = 0; i < res.length; i++) {
+                short bits = (short) f.apply(i);
+                res[i] = fromBits(bits);
+            }
+            return dummyVector().vectorFactory(res);
+        }
+
+        HalffloatVector vOp(FVOp f) {
+            short[] res = new short[laneCount()];
+            for (int i = 0; i < res.length; i++) {
+                res[i] = f.apply(i);
+            }
+            return dummyVector().vectorFactory(res);
+        }
+
+        HalffloatVector vOp(VectorMask<Halffloat> m, FVOp f) {
+            short[] res = new short[laneCount()];
+            boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits();
+            for (int i = 0; i < res.length; i++) {
+                if (mbits[i]) {
+                    res[i] = f.apply(i);
+                }
+            }
+            return dummyVector().vectorFactory(res);
+        }
+
+        /*package-private*/
+        @ForceInline
+        <M> HalffloatVector ldOp(M memory, int offset,
+                                      FLdOp<M> f) {
+            return dummyVector().ldOp(memory, offset, f);
+        }
+
+        /*package-private*/
+        @ForceInline
+        <M> HalffloatVector ldOp(M memory, int offset,
+                                      VectorMask<Halffloat> m,
+                                      FLdOp<M> f) {
+            return dummyVector().ldOp(memory, offset, m, f);
+        }
+
+        /*package-private*/
+        @ForceInline
+        <M> void stOp(M memory, int offset, FStOp<M> f) {
+            dummyVector().stOp(memory, offset, f);
+        }
+
+        /*package-private*/
+        @ForceInline
+        <M> void stOp(M memory, int offset,
+                      AbstractMask<Halffloat> m,
+                      FStOp<M> f) {
+            dummyVector().stOp(memory, offset, m, f);
+        }
+
+        // N.B. Make sure these constant vectors and
+        // masks load up correctly into registers.
+        //
+        // Also, see if we can avoid all that switching.
+        // Could we cache both vectors and both masks in
+        // this species object?
+
+        // Zero and iota vector access
+        @Override
+        @ForceInline
+        public final HalffloatVector zero() {
+            if ((Class<?>) vectorType() == HalffloatMaxVector.class)
+                return HalffloatMaxVector.ZERO;
+            switch (vectorBitSize()) {
+                case 64: return Halffloat64Vector.ZERO;
+                case 128: return Halffloat128Vector.ZERO;
+                case 256: return Halffloat256Vector.ZERO;
+                case 512: return Halffloat512Vector.ZERO;
+            }
+            throw new AssertionError();
+        }
+
+        @Override
+        @ForceInline
+        public final HalffloatVector iota() {
+            if ((Class<?>) vectorType() == HalffloatMaxVector.class)
+                return HalffloatMaxVector.IOTA;
+            switch (vectorBitSize()) {
+                case 64: return Halffloat64Vector.IOTA;
+                case 128: return Halffloat128Vector.IOTA;
+                case 256: return Halffloat256Vector.IOTA;
+                case 512: return Halffloat512Vector.IOTA;
+            }
+            throw new AssertionError();
+        }
+
+        // Mask access
+        @Override
+        @ForceInline
+        public final VectorMask<Halffloat> maskAll(boolean bit) {
+            if ((Class<?>) vectorType() == HalffloatMaxVector.class)
+                return HalffloatMaxVector.HalffloatMaxMask.maskAll(bit);
+            switch (vectorBitSize()) {
+                case 64: return Halffloat64Vector.Halffloat64Mask.maskAll(bit);
+                case 128: return Halffloat128Vector.Halffloat128Mask.maskAll(bit);
+                case 256: return Halffloat256Vector.Halffloat256Mask.maskAll(bit);
+                case 512: return Halffloat512Vector.Halffloat512Mask.maskAll(bit);
+            }
+            throw new AssertionError();
+        }
+    }
+
+    /**
+     * Finds a species for an element type of {@code short} and shape.
+     *
+     * @param s the shape
+     * @return a species for an element type of {@code short} and shape
+     * @throws IllegalArgumentException if no such species exists for the shape
+     */
+    static HalffloatSpecies species(VectorShape s) {
+        Objects.requireNonNull(s);
+        switch (s) {
+            case S_64_BIT: return (HalffloatSpecies) SPECIES_64;
+            case S_128_BIT: return (HalffloatSpecies) SPECIES_128;
+            case S_256_BIT: return (HalffloatSpecies) SPECIES_256;
+            case S_512_BIT: return (HalffloatSpecies) SPECIES_512;
+            case S_Max_BIT: return (HalffloatSpecies) SPECIES_MAX;
+            default: throw new IllegalArgumentException("Bad shape: " + s);
+        }
+    }
+
+    /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
+    public static final VectorSpecies<Halffloat> SPECIES_64
+        = new HalffloatSpecies(VectorShape.S_64_BIT,
+                            Halffloat64Vector.class,
+                            Halffloat64Vector.Halffloat64Mask.class,
+                            Halffloat64Vector::new);
+
+    /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
+    public static final VectorSpecies<Halffloat> SPECIES_128
+        = new HalffloatSpecies(VectorShape.S_128_BIT,
+                            Halffloat128Vector.class,
+                            Halffloat128Vector.Halffloat128Mask.class,
+                            Halffloat128Vector::new);
+
+    /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
+    public static final VectorSpecies<Halffloat> SPECIES_256
+        = new HalffloatSpecies(VectorShape.S_256_BIT,
+                            Halffloat256Vector.class,
+                            Halffloat256Vector.Halffloat256Mask.class,
+                            Halffloat256Vector::new);
+
+    /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
+    public static final VectorSpecies<Halffloat> SPECIES_512
+        = new HalffloatSpecies(VectorShape.S_512_BIT,
+                            Halffloat512Vector.class,
+                            Halffloat512Vector.Halffloat512Mask.class,
+                            Halffloat512Vector::new);
+
+    /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
+    public static final VectorSpecies<Halffloat> SPECIES_MAX
+        = new HalffloatSpecies(VectorShape.S_Max_BIT,
+                            HalffloatMaxVector.class,
+                            HalffloatMaxVector.HalffloatMaxMask.class,
+                            HalffloatMaxVector::new);
+
+    /**
+     * Preferred species for {@link HalffloatVector}s.
+     * A preferred species is a species of maximal bit-size for the platform.
+     */
+    public static final VectorSpecies<Halffloat> SPECIES_PREFERRED
+        = (HalffloatSpecies) VectorSpecies.ofPreferred(Halffloat.class);
+}
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java
index d402938f5b6..893adf62b01 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java
@@ -40,7 +40,8 @@ enum LaneType {
     BYTE(byte.class, Byte.class, byte[].class, 'I', -1, Byte.SIZE, T_BYTE),
     SHORT(short.class, Short.class, short[].class, 'I', -1, Short.SIZE, T_SHORT),
     INT(int.class, Integer.class, int[].class, 'I', -1, Integer.SIZE, T_INT),
-    LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, T_LONG);
+    LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, T_LONG),
+    HALFFLOAT(Halffloat.class, Short.class, short[].class, 'F', 11, Halffloat.SIZE, T_HALFFLOAT);
 
     LaneType(Class<?> elementType,
              Class<?> genericElementType,
@@ -66,13 +67,13 @@ enum LaneType {
         // printName.  If we do unsigned or vector or bit lane types,
         // report that condition also.
         this.typeChar = printName.toUpperCase().charAt(0);
-        assert("FDBSIL".indexOf(typeChar) == ordinal()) : this;
+        assert("FDBSILH".indexOf(typeChar) == ordinal()) : this;
         // Same as in JVMS, org.objectweb.asm.Opcodes, etc.:
         this.basicType = basicType;
         assert(basicType ==
                ( (elementSizeLog2 - /*lg(Byte.SIZE)*/ 3)
                  | (elementKind == 'F' ? 4 : 8))) : this;
-        assert("....zcFDBSILoav..".charAt(basicType) == typeChar);
+        assert("....zHFDBSILoav..".charAt(basicType) == typeChar);
     }
 
     final Class<?> elementType;
@@ -176,13 +177,14 @@ RuntimeException badElementType(Class<?> elementType, Object expected) {
     // don't optimize properly; see JDK-8161245
 
     static final int
-        SK_FLOAT    = 1,
-        SK_DOUBLE   = 2,
-        SK_BYTE     = 3,
-        SK_SHORT    = 4,
-        SK_INT      = 5,
-        SK_LONG     = 6,
-        SK_LIMIT    = 7;
+        SK_FLOAT     = 1,
+        SK_DOUBLE    = 2,
+        SK_BYTE      = 3,
+        SK_SHORT     = 4,
+        SK_INT       = 5,
+        SK_LONG      = 6,
+        SK_HALFFLOAT = 7,
+        SK_LIMIT     = 8;
 
     /*package-private*/
     @ForceInline
@@ -244,7 +246,8 @@ static LaneType ofBasicType(int bt) {
             // set up asFloating
             if (value.elementKind == 'F') {
                 value.asFloating = value;
-            } else {
+            } else if (value.basicType != T_SHORT) {
+                // FIXME: Support asFloating for short to be Halffloat
                 for (LaneType v : values) {
                     if (v.elementKind == 'F' &&
                         v.elementSize == value.elementSize) {
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
index e1cada48f17..c63ac78fed2 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
@@ -4028,11 +4028,10 @@ public final ShortVector viewAsIntegralLanes() {
     @ForceInline
     @Override
     public final
-    Vector<?>
+    HalffloatVector
     viewAsFloatingLanes() {
         LaneType flt = LaneType.SHORT.asFloating();
-        // asFloating() will throw UnsupportedOperationException for the unsupported type short
-        throw new AssertionError("Cannot reach here");
+        return (HalffloatVector) asVectorRaw(flt);
     }
 
     // ================================================
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java
index d37066e6ff7..c1b90b7acd6 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java
@@ -2981,6 +2981,19 @@ public abstract VectorMask<E> compare(VectorOperators.Comparison op,
      */
     public abstract DoubleVector reinterpretAsDoubles();
 
+    /**
+     * Reinterprets this vector as a vector of the same shape
+     * and contents but a lane type of {@code halffloat},
+     * where the lanes are assembled from successive bytes
+     * according to little-endian order.
+     * It is a convenience method for the expression
+     * {@code reinterpretShape(species().withLanes(halffloat.class))}.
+     * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
+     *
+     * @return a {@code HalffloatVector} with the same shape and information content
+     */
+    public abstract HalffloatVector reinterpretAsHalffloats();
+
     /**
      * Views this vector as a vector of the same shape, length, and
      * contents, but a lane type that is not a floating-point type.
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorIntrinsics.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorIntrinsics.java
index bada3487379..84ff916f08a 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorIntrinsics.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorIntrinsics.java
@@ -25,12 +25,15 @@
 package jdk.incubator.vector;
 
 import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.misc.Unsafe;
 
 import java.util.Objects;
 
 /*non-public*/ class VectorIntrinsics {
     static final int VECTOR_ACCESS_OOB_CHECK = Integer.getInteger("jdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK", 2);
 
+    static final Unsafe U = Unsafe.getUnsafe();
+
     @ForceInline
     static void requireLength(int haveLength, int length) {
         if (haveLength != length) {
@@ -111,4 +114,8 @@ private static int wrapToRangeNPOT(int index, int size) {
             return Math.floorMod(index, Math.abs(size));
         }
     }
+    static <V> V maybeRebox(V v) {
+        U.loadFence();
+        return v;
+    }
 }
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java
index 1ffbcef821a..7166f998c9b 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java
@@ -262,6 +262,9 @@ static int getMaxVectorBitSize(Class<?> etype) {
         // VectorSupport.getMaxLaneCount may return -1 if C2 is not enabled,
         // or a value smaller than the S_64_BIT.vectorBitSize / elementSizeInBits if MaxVectorSize < 16
         // If so default to S_64_BIT
+        if (etype == Halffloat.class) {
+            etype = short.class;
+        }
         int maxLaneCount = VectorSupport.getMaxLaneCount(etype);
         int elementSizeInBits = LaneType.of(etype).elementSize;
         return Math.max(maxLaneCount * elementSizeInBits, S_64_BIT.vectorBitSize);
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
index 45c2cf9267c..7da1b456855 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
@@ -470,9 +470,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     public static $abstractvectortype$ zero(VectorSpecies<$Boxtype$> species) {
         $Type$Species vsp = ($Type$Species) species;
 #if[FP]
+#if[short]
+        return VectorSupport.broadcastCoerced(vsp.vectorType(), Halffloat.class, species.length(),
+                        toBits((short)0), vsp,
+                        ((bits_, s_) -> s_.rvOp(i -> bits_)));
+#else[short]
         return VectorSupport.broadcastCoerced(vsp.vectorType(), $type$.class, species.length(),
                         toBits(0.0f), vsp,
                         ((bits_, s_) -> s_.rvOp(i -> bits_)));
+#end[short]
 #else[FP]
         return VectorSupport.broadcastCoerced(vsp.vectorType(), $type$.class, species.length(),
                                 0, vsp,
@@ -604,7 +610,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         }
         int opc = opCode(op);
         return VectorSupport.unaryOp(
-            opc, getClass(), null, $type$.class, length(),
+            opc, getClass(), null, $elemtype$.class, length(),
             this, null,
             UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations));
     }
@@ -636,7 +642,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         }
         int opc = opCode(op);
         return VectorSupport.unaryOp(
-            opc, getClass(), maskClass, $type$.class, length(),
+            opc, getClass(), maskClass, $elemtype$.class, length(),
             this, m,
             UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations));
     }
@@ -652,6 +658,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
             case VECTOR_OP_ABS: return (v0, m) ->
                     v0.uOp(m, (i, a) -> ($type$) Math.abs(a));
 #if[FP]
+#if[!short]
             case VECTOR_OP_SIN: return (v0, m) ->
                     v0.uOp(m, (i, a) -> ($type$) Math.sin(a));
             case VECTOR_OP_COS: return (v0, m) ->
@@ -684,6 +691,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                     v0.uOp(m, (i, a) -> ($type$) Math.expm1(a));
             case VECTOR_OP_LOG1P: return (v0, m) ->
                     v0.uOp(m, (i, a) -> ($type$) Math.log1p(a));
+#end[!short]
 #end[FP]
             default: return null;
         }
@@ -744,7 +752,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
         int opc = opCode(op);
         return VectorSupport.binaryOp(
-            opc, getClass(), null, $type$.class, length(),
+            opc, getClass(), null, $elemtype$.class, length(),
             this, that, null,
             BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations));
     }
@@ -804,7 +812,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
         int opc = opCode(op);
         return VectorSupport.binaryOp(
-            opc, getClass(), maskClass, $type$.class, length(),
+            opc, getClass(), maskClass, $elemtype$.class, length(),
             this, that, m,
             BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations));
     }
@@ -815,6 +823,19 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     private static BinaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> binaryOperations(int opc_) {
         switch (opc_) {
+#if[FP]
+#if[short]
+            case VECTOR_OP_ADD: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() + Halffloat.valueOf(b).floatValue())));
+            case VECTOR_OP_SUB: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() - Halffloat.valueOf(b).floatValue())));
+            case VECTOR_OP_MUL: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() * Halffloat.valueOf(b).floatValue())));
+            case VECTOR_OP_MAX: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf(Math.max(Halffloat.valueOf(a).floatValue(),Halffloat.valueOf(b).floatValue())));
+            case VECTOR_OP_MIN: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf(Math.min(Halffloat.valueOf(a).floatValue(),Halffloat.valueOf(b).floatValue())));
+#else[short]
             case VECTOR_OP_ADD: return (v0, v1, vm) ->
                     v0.bOp(v1, vm, (i, a, b) -> ($type$)(a + b));
             case VECTOR_OP_SUB: return (v0, v1, vm) ->
@@ -827,6 +848,21 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                     v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.max(a, b));
             case VECTOR_OP_MIN: return (v0, v1, vm) ->
                     v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.min(a, b));
+#end[short]
+#else[FP]
+            case VECTOR_OP_ADD: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a + b));
+            case VECTOR_OP_SUB: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a - b));
+            case VECTOR_OP_MUL: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a * b));
+            case VECTOR_OP_DIV: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a / b));
+            case VECTOR_OP_MAX: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.max(a, b));
+            case VECTOR_OP_MIN: return (v0, v1, vm) ->
+                    v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.min(a, b));
+#end[FP]
 #if[BITWISE]
             case VECTOR_OP_AND: return (v0, v1, vm) ->
                     v0.bOp(v1, vm, (i, a, b) -> ($type$)(a & b));
@@ -846,6 +882,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                     v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
 #end[BITWISE]
 #if[FP]
+#if[!short]
             case VECTOR_OP_OR: return (v0, v1, vm) ->
                     v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b)));
             case VECTOR_OP_ATAN2: return (v0, v1, vm) ->
@@ -854,6 +891,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                     v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.pow(a, b));
             case VECTOR_OP_HYPOT: return (v0, v1, vm) ->
                     v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.hypot(a, b));
+#end[!short]
 #end[FP]
             default: return null;
         }
@@ -1026,7 +1064,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         e &= SHIFT_MASK;
         int opc = opCode(op);
         return VectorSupport.broadcastInt(
-            opc, getClass(), maskClass, $type$.class, length(),
+            opc, getClass(), maskClass, $elemtype$.class, length(),
             this, e, m,
             BIN_INT_IMPL.find(op, opc, $abstractvectortype$::broadcastIntOperations));
     }
@@ -1110,7 +1148,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[BITWISE]
         int opc = opCode(op);
         return VectorSupport.ternaryOp(
-            opc, getClass(), null, $type$.class, length(),
+            opc, getClass(), null, $elemtype$.class, length(),
             this, that, tother, null,
             TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations));
     }
@@ -1152,7 +1190,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 #end[BITWISE]
         int opc = opCode(op);
         return VectorSupport.ternaryOp(
-            opc, getClass(), maskClass, $type$.class, length(),
+            opc, getClass(), maskClass, $elemtype$.class, length(),
             this, that, tother, m,
             TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations));
     }
@@ -1164,8 +1202,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     private static TernaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> ternaryOperations(int opc_) {
         switch (opc_) {
 #if[FP]
+#if[short]
+            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) ->
+                    Halffloat.valueOf(Math.fma(Halffloat.valueOf(a).floatValue(),
+                    Halffloat.valueOf(b).floatValue(), Halffloat.valueOf(c).floatValue())));
+#else[short]
             case VECTOR_OP_FMA: return (v0, v1_, v2_, m) ->
                     v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
+#end[short]
 #end[FP]
             default: return null;
         }
@@ -2189,7 +2233,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         that.check(this);
         int opc = opCode(op);
         return VectorSupport.compare(
-            opc, getClass(), maskType, $type$.class, length(),
+            opc, getClass(), maskType, $elemtype$.class, length(),
             this, that, null,
             (cond, v0, v1, m1) -> {
                 AbstractMask<$Boxtype$> m
@@ -2211,7 +2255,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         m.check(maskType, this);
         int opc = opCode(op);
         return VectorSupport.compare(
-            opc, getClass(), maskType, $type$.class, length(),
+            opc, getClass(), maskType, $elemtype$.class, length(),
             this, that, m,
             (cond, v0, v1, m1) -> {
                 AbstractMask<$Boxtype$> cmpM
@@ -2226,12 +2270,31 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     private static boolean compareWithOp(int cond, $type$ a, $type$ b) {
         return switch (cond) {
+#if[FP]
+#if[!short]
             case BT_eq -> a == b;
             case BT_ne -> a != b;
             case BT_lt -> a < b;
             case BT_le -> a <= b;
             case BT_gt -> a > b;
             case BT_ge -> a >= b;
+#end[!short]
+#if[short]
+            case BT_eq -> Halffloat.valueOf(a).floatValue() == Halffloat.valueOf(b).floatValue();
+            case BT_ne -> Halffloat.valueOf(a).floatValue() != Halffloat.valueOf(b).floatValue();
+            case BT_lt -> Halffloat.valueOf(a).floatValue() < Halffloat.valueOf(b).floatValue();
+            case BT_le -> Halffloat.valueOf(a).floatValue() <= Halffloat.valueOf(b).floatValue();
+            case BT_gt -> Halffloat.valueOf(a).floatValue() > Halffloat.valueOf(b).floatValue();
+            case BT_ge -> Halffloat.valueOf(a).floatValue() >= Halffloat.valueOf(b).floatValue();
+#end[short]
+#else[FP]
+            case BT_eq -> a == b;
+            case BT_ne -> a != b;
+            case BT_lt -> a < b;
+            case BT_le -> a <= b;
+            case BT_gt -> a > b;
+            case BT_ge -> a >= b;
+#end[FP]
 #if[!FP]
             case BT_ult -> $Boxtype$.compareUnsigned(a, b) < 0;
             case BT_ule -> $Boxtype$.compareUnsigned(a, b) <= 0;
@@ -2346,7 +2409,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     blendTemplate(Class<M> maskType, $abstractvectortype$ v, M m) {
         v.check(this);
         return VectorSupport.blend(
-            getClass(), maskType, $type$.class, length(),
+            getClass(), maskType, $elemtype$.class, length(),
             this, v, m,
             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
     }
@@ -2363,7 +2426,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         // make sure VLENGTH*scale doesn't overflow:
         vsp.checkScale(scale);
         return VectorSupport.indexVector(
-            getClass(), $type$.class, length(),
+            getClass(), $elemtype$.class, length(),
             this, scale, vsp,
             (v, scale_, s)
             -> {
@@ -2555,7 +2618,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     $abstractvectortype$ rearrangeTemplate(Class<S> shuffletype, S shuffle) {
         shuffle.checkIndexes();
         return VectorSupport.rearrangeOp(
-            getClass(), shuffletype, null, $type$.class, length(),
+            getClass(), shuffletype, null, $elemtype$.class, length(),
             this, shuffle, null,
             (v1, s_, m_) -> v1.uOp((i, a) -> {
                 int ei = s_.laneSource(i);
@@ -2587,7 +2650,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
             throw new AssertionError();
         }
         return VectorSupport.rearrangeOp(
-                   getClass(), shuffletype, masktype, $type$.class, length(),
+                   getClass(), shuffletype, masktype, $elemtype$.class, length(),
                    this, shuffle, m,
                    (v1, s_, m_) -> v1.uOp((i, a) -> {
                         int ei = s_.laneSource(i);
@@ -2615,7 +2678,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         S ws = (S) shuffle.wrapIndexes();
         $abstractvectortype$ r0 =
             VectorSupport.rearrangeOp(
-                getClass(), shuffletype, null, $type$.class, length(),
+                getClass(), shuffletype, null, $elemtype$.class, length(),
                 this, ws, null,
                 (v0, s_, m_) -> v0.uOp((i, a) -> {
                     int ei = s_.laneSource(i);
@@ -2623,7 +2686,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 }));
         $abstractvectortype$ r1 =
             VectorSupport.rearrangeOp(
-                getClass(), shuffletype, null, $type$.class, length(),
+                getClass(), shuffletype, null, $elemtype$.class, length(),
                 v, ws, null,
                 (v1, s_, m_) -> v1.uOp((i, a) -> {
                     int ei = s_.laneSource(i);
@@ -3026,7 +3089,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         }
         int opc = opCode(op);
         return fromBits(VectorSupport.reductionCoerced(
-            opc, getClass(), maskClass, $type$.class, length(),
+            opc, getClass(), maskClass, $elemtype$.class, length(),
             this, m,
             REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations)));
     }
@@ -3043,7 +3106,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         }
         int opc = opCode(op);
         return fromBits(VectorSupport.reductionCoerced(
-            opc, getClass(), null, $type$.class, length(),
+            opc, getClass(), null, $elemtype$.class, length(),
             this, null,
             REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations)));
     }
@@ -3054,6 +3117,27 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
 
     private static ReductionOperation<$abstractvectortype$, VectorMask<$Boxtype$>> reductionOperations(int opc_) {
         switch (opc_) {
+#if[FP]
+#if[short]
+            case VECTOR_OP_ADD: return (v, m) ->
+                    toBits(v.rOp(($type$)0, m, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() + Halffloat.valueOf(b).floatValue()))));
+            case VECTOR_OP_MUL: return (v, m) ->
+                    toBits(v.rOp(($type$)1, m, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() * Halffloat.valueOf(b).floatValue()))));
+            case VECTOR_OP_MIN: return (v, m) ->
+                    toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> Halffloat.valueOf(Math.min(Halffloat.valueOf(a).floatValue(), Halffloat.valueOf(b).floatValue()))));
+            case VECTOR_OP_MAX: return (v, m) ->
+                    toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> Halffloat.valueOf(Math.max(Halffloat.valueOf(a).floatValue(), Halffloat.valueOf(b).floatValue()))));
+#else[short]
+            case VECTOR_OP_ADD: return (v, m) ->
+                    toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b)));
+            case VECTOR_OP_MUL: return (v, m) ->
+                    toBits(v.rOp(($type$)1, m, (i, a, b) -> ($type$)(a * b)));
+            case VECTOR_OP_MIN: return (v, m) ->
+                    toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b)));
+            case VECTOR_OP_MAX: return (v, m) ->
+                    toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b)));
+#end[short]
+#else[FP]
             case VECTOR_OP_ADD: return (v, m) ->
                     toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b)));
             case VECTOR_OP_MUL: return (v, m) ->
@@ -3062,6 +3146,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                     toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b)));
             case VECTOR_OP_MAX: return (v, m) ->
                     toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b)));
+#end[FP]
 #if[BITWISE]
             case VECTOR_OP_AND: return (v, m) ->
                     toBits(v.rOp(($type$)-1, m, (i, a, b) -> ($type$)(a & b)));
@@ -3373,7 +3458,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, a.length);
         ByteBuffer wb = wrapper(a, bo);
         return vsp.ldOp(wb, offset, (AbstractMask<$Boxtype$>)m,
-                   (wb_, o, i)  -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
+                   (wb_, o, i)  -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$));
     }
 
     /**
@@ -4007,7 +4092,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, bb.limit());
         ByteBuffer wb = wrapper(bb, bo);
         return vsp.ldOp(wb, offset, (AbstractMask<$Boxtype$>)m,
-                   (wb_, o, i)  -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
+                   (wb_, o, i)  -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$));
     }
 
     // Memory store operations
@@ -4810,7 +4895,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
             (arr, off, s) -> {
                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
                 return s.ldOp(wb, off,
-                        (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
+                        (wb_, o, i) -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$));
             });
     }
 
@@ -4829,7 +4914,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
             (arr, off, s, vm) -> {
                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
                 return s.ldOp(wb, off, vm,
-                        (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
+                        (wb_, o, i) -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$));
             });
     }
 
@@ -4845,7 +4930,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 (buf, off, s) -> {
                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
                     return s.ldOp(wb, off,
-                            (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
+                            (wb_, o, i) -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$));
                 });
     }
 
@@ -4863,7 +4948,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 (buf, off, s, vm) -> {
                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
                     return s.ldOp(wb, off, vm,
-                            (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$));
+                            (wb_, o, i) -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$));
                 });
     }
 
@@ -4998,7 +5083,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
             (arr, off, v) -> {
                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
                 v.stOp(wb, off,
-                        (tb_, o, i, e) -> tb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
+                        (tb_, o, i, e) -> tb_.put{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$, e));
             });
     }
 
@@ -5017,7 +5102,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
             (arr, off, v, vm) -> {
                 ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN);
                 v.stOp(wb, off, vm,
-                        (tb_, o, i, e) -> tb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
+                        (tb_, o, i, e) -> tb_.put{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$, e));
             });
     }
 
@@ -5031,7 +5116,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 (buf, off, v) -> {
                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
                     v.stOp(wb, off,
-                            (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
+                            (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$, e));
                 });
     }
 
@@ -5049,7 +5134,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
                 (buf, off, v, vm) -> {
                     ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN);
                     v.stOp(wb, off, vm,
-                            (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e));
+                            (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$, e));
                 });
     }
 
@@ -5204,18 +5289,26 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
     @ForceInline
     @Override
     public final
-    {#if[byteOrShort]?Vector<?>:$Fptype$Vector}
+#if[FP]
+#if[short]
+    $Type$Vector
+#else[short]
+    $Fptype$Vector
+#end[short]
+#else[FP]
+    {#if[byte]?Vector<?>:$Fptype$Vector}
+#end[FP]
     viewAsFloatingLanes() {
 #if[FP]
         return this;
 #else[FP]
         LaneType flt = LaneType.$TYPE$.asFloating();
-#if[!byteOrShort]
+#if[!byte]
         return ($Fptype$Vector) asVectorRaw(flt);
-#else[!byteOrShort]
+#else[!byte]
         // asFloating() will throw UnsupportedOperationException for the unsupported type $type$
         throw new AssertionError("Cannot reach here");
-#end[!byteOrShort]
+#end[!byte]
 #end[FP]
     }
 
@@ -5299,7 +5392,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         @Override
         @ForceInline
         public final Class<$Boxtype$> elementType() {
-            return $type$.class;
+            return $elemtype$.class;
         }
 
         @Override
@@ -5328,7 +5421,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         final $abstractvectortype$ broadcastBits(long bits) {
             return ($abstractvectortype$)
                 VectorSupport.broadcastCoerced(
-                    vectorType, $type$.class, laneCount,
+                    vectorType, $elemtype$.class, laneCount,
                     bits, this,
                     (bits_, s_) -> s_.rvOp(i -> bits_));
         }
@@ -5578,5 +5671,5 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
      * A preferred species is a species of maximal bit-size for the platform.
      */
     public static final VectorSpecies<$Boxtype$> SPECIES_PREFERRED
-        = ($Type$Species) VectorSpecies.ofPreferred($type$.class);
+        = ($Type$Species) VectorSpecies.ofPreferred($elemtype$.class);
 }
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template
index df15c85fccc..381bd03b5eb 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template
@@ -52,7 +52,7 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM
 
-    static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM
+    static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM
 
     $vectortype$($type$[] v) {
         super(v);
@@ -88,7 +88,7 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     @ForceInline
     @Override
-    public final Class<$Boxtype$> elementType() { return $type$.class; }
+    public final Class<$Boxtype$> elementType() { return $elemtype$.class; }
 
     @ForceInline
     @Override
@@ -801,7 +801,7 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     static final class $masktype$ extends AbstractMask<$Boxtype$> {
         static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
-        static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM
+        static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM
 
         $masktype$(boolean[] bits) {
             this(bits, 0);
@@ -1028,7 +1028,7 @@ final class $vectortype$ extends $abstractvectortype$ {
 
     static final class $shuffletype$ extends AbstractShuffle<$Boxtype$> {
         static final int VLENGTH = VSPECIES.laneCount();    // used by the JVM
-        static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM
+        static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM
 
         $shuffletype$(byte[] reorder) {
             super(VLENGTH, reorder);
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh
index 6841a47c757..5936d6e5780 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh
@@ -53,10 +53,19 @@ typeprefix=
 globalArgs=""
 #globalArgs="$globalArgs -KextraOverrides"
 
-for type in byte short int long float double
+for type in byte short int long float double halffloat
 do
+
   Type="$(tr '[:lower:]' '[:upper:]' <<< ${type:0:1})${type:1}"
   TYPE="$(tr '[:lower:]' '[:upper:]' <<< ${type})"
+
+  case $type in
+    halffloat)
+       type=short
+       TYPE=SHORT
+       ;;
+  esac
+
   args=$globalArgs
   args="$args -K$type -Dtype=$type -DType=$Type -DTYPE=$TYPE"
 
@@ -72,19 +81,24 @@ do
   fptype=$type
   Fptype=$Type
   Boxfptype=$Boxtype
+  elemtype=$type
+  Elemtype=$Type
 
-  case $type in
-    byte)
+  case $Type in
+    Byte)
       Wideboxtype=Integer
       sizeInBytes=1
       args="$args -KbyteOrShort"
       ;;
-    short)
+    Short)
+      fptype=halffloat
+      Fptype=Halffloat
+      Boxfptype=Halffloat
       Wideboxtype=Integer
       sizeInBytes=2
       args="$args -KbyteOrShort"
       ;;
-    int)
+    Int)
       Boxtype=Integer
       Wideboxtype=Integer
       Boxbitstype=Integer
@@ -94,14 +108,14 @@ do
       sizeInBytes=4
       args="$args -KintOrLong -KintOrFP -KintOrFloat"
       ;;
-    long)
+    Long)
       fptype=double
       Fptype=Double
       Boxfptype=Double
       sizeInBytes=8
       args="$args -KintOrLong -KlongOrDouble"
       ;;
-    float)
+    Float)
       kind=FP
       bitstype=int
       Bitstype=Int
@@ -109,7 +123,7 @@ do
       sizeInBytes=4
       args="$args -KintOrFP -KintOrFloat"
       ;;
-    double)
+    Double)
       kind=FP
       bitstype=long
       Bitstype=Long
@@ -117,12 +131,23 @@ do
       sizeInBytes=8
       args="$args -KintOrFP -KlongOrDouble"
       ;;
+    Halffloat)
+      kind=FP
+      bitstype=short
+      Bitstype=Short
+      Boxbitstype=Short
+      sizeInBytes=2
+      elemtype=Halffloat
+      Elemtype=Short
+      args="$args -KbyteOrShort -KshortOrFP -KshortOrHalffloat"
+      ;;
   esac
 
   args="$args -K$kind -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype"
   args="$args -Dbitstype=$bitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype"
   args="$args -Dfptype=$fptype -DFptype=$Fptype -DBoxfptype=$Boxfptype"
   args="$args -DsizeInBytes=$sizeInBytes"
+  args="$args -Delemtype=$elemtype -DElemtype=$Elemtype"
 
   abstractvectortype=${typeprefix}${Type}Vector
   abstractbitsvectortype=${typeprefix}${Bitstype}Vector
diff --git a/test/jdk/jdk/incubator/vector/AddTest.java b/test/jdk/jdk/incubator/vector/AddTest.java
index bd11f0092be..68ffc6f1a27 100644
--- a/test/jdk/jdk/incubator/vector/AddTest.java
+++ b/test/jdk/jdk/incubator/vector/AddTest.java
@@ -27,7 +27,8 @@
  * @requires vm.compiler2.enabled
  */
 
-import jdk.incubator.vector.FloatVector;
+import jdk.incubator.vector.Halffloat;
+import jdk.incubator.vector.HalffloatVector;
 import jdk.incubator.vector.VectorShape;
 import jdk.incubator.vector.VectorSpecies;
 import jdk.incubator.vector.Vector;
@@ -36,30 +37,30 @@
 import java.util.stream.IntStream;
 
 public class AddTest {
-    static final VectorSpecies<Float> SPECIES =
-            FloatVector.SPECIES_256;
+    static final VectorSpecies<Halffloat> SPECIES =
+            HalffloatVector.SPECIES_128;
 
     static final int SIZE = 1024;
-    static float[] a = new float[SIZE];
-    static float[] b = new float[SIZE];
-    static float[] c = new float[SIZE];
+    static short[] a = new short[SIZE];
+    static short[] b = new short[SIZE];
+    static short[] c = new short[SIZE];
 
     static {
         for (int i = 0; i < SIZE; i++) {
-            a[i] = 1f;
-            b[i] = 2f;
+            a[i] = Halffloat.valueOf((float)i);
+            b[i] = Halffloat.valueOf((float)i);
         }
     }
 
     static void workload() {
         for (int i = 0; i < a.length; i += SPECIES.length()) {
-            FloatVector av = FloatVector.fromArray(SPECIES, a, i);
-            FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
+            HalffloatVector av = HalffloatVector.fromArray(SPECIES, a, i);
+            HalffloatVector bv = HalffloatVector.fromArray(SPECIES, b, i);
             av.add(bv).intoArray(c, i);
         }
     }
 
-    static final int[] IDENTITY_INDEX_MAPPING = IntStream.range(0, SPECIES.length()).toArray();
+    /*static final int[] IDENTITY_INDEX_MAPPING = IntStream.range(0, SPECIES.length()).toArray();
 
     static void workloadIndexMapped() {
         for (int i = 0; i < a.length; i += SPECIES.length()) {
@@ -67,18 +68,24 @@ static void workloadIndexMapped() {
             FloatVector bv = FloatVector.fromArray(SPECIES, b, i, IDENTITY_INDEX_MAPPING, 0);
             av.add(bv).intoArray(c, i, IDENTITY_INDEX_MAPPING, 0);
         }
-    }
+    }*/
 
     public static void main(String args[]) {
         for (int i = 0; i < 30_0000; i++) {
             workload();
         }
         for (int i = 0; i < a.length; i++) {
-            if (c[i] != a[i] + b[i])
+            Halffloat hfa = new Halffloat(a[i]);
+            Halffloat hfb = new Halffloat(b[i]);
+            Halffloat hfc = new Halffloat(c[i]);
+
+            if (hfc.floatValue() != (hfa.floatValue() + hfb.floatValue())) {
+                System.out.println("RES: " + hfc.floatValue() + " EXPECTED: " + (hfa.floatValue() + hfb.floatValue()));
                 throw new AssertionError();
+            }
         }
 
-        Arrays.fill(c, 0.0f);
+        /*Arrays.fill(c, 0.0f);
 
         for (int i = 0; i < 30_0000; i++) {
             workloadIndexMapped();
@@ -86,6 +93,6 @@ public static void main(String args[]) {
         for (int i = 0; i < a.length; i++) {
             if (c[i] != a[i] + b[i])
                 throw new AssertionError();
-        }
+        }*/
     }
 }