diff --git a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java index 63e00765792..1ec5a2b1bcd 100644 --- a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java +++ b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java @@ -116,12 +116,13 @@ public class VectorSupport { // BasicType codes, for primitives only: public static final int - T_FLOAT = 6, - T_DOUBLE = 7, - T_BYTE = 8, - T_SHORT = 9, - T_INT = 10, - T_LONG = 11; + T_HALFFLOAT = 5, + T_FLOAT = 6, + T_DOUBLE = 7, + T_BYTE = 8, + T_SHORT = 9, + T_INT = 10, + T_LONG = 11; /* ============================================================================ */ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java index 4987546dc36..f7b86536f19 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java @@ -298,7 +298,15 @@ AbstractVector<E> dummyVector() { return makeDummyVector(); } private AbstractVector<E> makeDummyVector() { - Object za = Array.newInstance(elementType(), laneCount); + Object za; + // FIXME: Remove the following special handling for + // Halffloat till Valhalla integration when Halffloat + // will become a primitive class. + if (elementType() == Halffloat.class) { + za = Array.newInstance(short.class, laneCount); + } else { + za = Array.newInstance(elementType(), laneCount); + } return dummyVector = vectorFactory.apply(za); // This is the only use of vectorFactory. // All other factory requests are routed @@ -610,6 +618,8 @@ AbstractSpecies<?> computeSpecies(LaneType laneType, s = IntVector.species(shape); break; case LaneType.SK_LONG: s = LongVector.species(shape); break; + case LaneType.SK_HALFFLOAT: + s = HalffloatVector.species(shape); break; } if (s == null) { // NOTE: The result of this method is guaranteed to be diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java index 1ffdb058a27..60f620ebc5f 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java @@ -250,6 +250,15 @@ public DoubleVector reinterpretAsDoubles() { return (DoubleVector) asVectorRaw(LaneType.DOUBLE); } + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public HalffloatVector reinterpretAsHalffloats() { + return (HalffloatVector) asVectorRaw(LaneType.HALFFLOAT); + } + /** * {@inheritDoc} <!--workaround--> */ @@ -521,6 +530,7 @@ AbstractVector<F> defaultReinterpret(AbstractSpecies<F> rsp) { return FloatVector.fromByteBuffer(rsp.check(float.class), bb, 0, bo, m.check(float.class)).check0(rsp); case LaneType.SK_DOUBLE: return DoubleVector.fromByteBuffer(rsp.check(double.class), bb, 0, bo, m.check(double.class)).check0(rsp); + // FIXME: Add lanetype for Halffloat default: throw new AssertionError(rsp.toString()); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat.java new file mode 100644 index 00000000000..78eb3d687e6 --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat.java @@ -0,0 +1,174 @@ +/* + * Copyright (c) 1994, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import jdk.internal.vm.annotation.IntrinsicCandidate; + +/** + * A specialized {@link Vector} representing an ordered immutable sequence of + * {@code short} values. + * @author abc + * @version 1.0 + * @since 10/01/2021 + */ +@SuppressWarnings("serial") +public final class Halffloat extends Number implements Comparable<Halffloat>{ + /** Definitions for FP16*/ + public static final short MAX_VALUE = 0x7bff; + /** Definitions for FP16 */ + public static final short MIN_VALUE = 0x400; + /** Definitions for FP16 */ + public static final short POSITIVE_INFINITY = 0x7c00; + /** Definitions for FP16 */ + public static final short NEGATIVE_INFINITY = (short)0xfc00; + /** Definitions for FP16*/ + public static final short NaN = (short)0xffff; + /** Definitions for FP16*/ + private static final float MAX_FLOAT_VALUE = 0x1.ffep+15f; + /** Definitions for FP16*/ + private static final float MIN_FLOAT_VALUE = 0x1.004p-14f; + /** Definitions for FP16 */ + public static final int SIZE = 16; + /** Definitions for FP16 */ + public static final int BYTES = SIZE / Byte.SIZE; + /** Definitions for FP16 */ + private final short value; + + /** + * Returns a new Halffloat. + * @param f the species describing the element type + * @return short value of float provided + */ + public static Halffloat valueOf(short f) { + return new Halffloat(f); + } + + /** + * Halffloat constructor + * @param value short value assigned to halffloat + */ + public Halffloat(short value) { + this.value = value; + } + + /** + * Halffloat constructor + * @param f float value assigned to halffloat + */ + public Halffloat(float f) { + this.value = valueOf(f); + } + + /** + * Returns floatvalue of a given short value. + * @return a float value of short provided + */ + public float floatValue() { + int val = (int)value; + float result; + switch(val) { + case Halffloat.POSITIVE_INFINITY: + result = Float.POSITIVE_INFINITY; + break; + case Halffloat.NEGATIVE_INFINITY: + result = Float.NEGATIVE_INFINITY; + break; + case Halffloat.NaN: + result = Float.NaN; + break; + default: + result = (Float.intBitsToFloat(((val&0x8000)<<16) | (((val&0x7c00)+0x1C000)<<13) | ((val&0x03FF)<<13))); + break; + } + return result; + } + + /** + * Returns halffloat value of a given float. + * @param f float value to be converted into halffloat + * @return short value of float provided + */ + public static short valueOf(float f) { + if (f > Halffloat.MAX_FLOAT_VALUE) return Halffloat.POSITIVE_INFINITY; + if (Float.isNaN(f)) return Halffloat.NaN; + + if (f < Halffloat.MIN_FLOAT_VALUE) return Halffloat.NEGATIVE_INFINITY; + + int val = Float.floatToIntBits(f); + val = ((((val>>16)&0x8000)|((((val&0x7f800000)-0x38000000)>>13)&0x7c00)|((val>>13)&0x03ff))); + return (short)val; + } + + /** doublevalue */ + public double doubleValue() { + return (double) floatValue(); + } + + /** longValue */ + public long longValue() { + return (long) value; + } + + /** IntValue */ + public int intValue() { + return (int) value; + } + + /** + * Returns the size, in bits, of vectors of this shape. + * @param bits the species describing the element type + * @return short value of float provided + */ + public static short shortBitsToHalffloat(short bits) { + return bits; + } + /** + * Returns the size, in bits, of vectors of this shape. + * @param bits the species describing the element type + * @return short value of float provided + */ + public static short shortToRawShortBits(short bits) { + return bits; + } + /** + * Returns the size, in bits, of vectors of this shape. + * @param bits the species describing the element type + * @return short value of float provided + */ + public static short shortToShortBits(short bits) { + return bits; + } + + /** + Compares two halffloats + * @param hf value to be compared + * @return 0, 1, -1 + */ + public int compareTo(Halffloat hf) { + float f1 = floatValue(); + float f2 = hf.floatValue(); + return Float.compare(f1, f2); + } +} diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java new file mode 100644 index 00000000000..b1150fc8a08 --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat128Vector.java @@ -0,0 +1,919 @@ +/* + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.IntUnaryOperator; + +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +@SuppressWarnings("cast") // warning: redundant cast +final class Halffloat128Vector extends HalffloatVector { + static final HalffloatSpecies VSPECIES = + (HalffloatSpecies) HalffloatVector.SPECIES_128; + + static final VectorShape VSHAPE = + VSPECIES.vectorShape(); + + static final Class<Halffloat128Vector> VCLASS = Halffloat128Vector.class; + + static final int VSIZE = VSPECIES.vectorBitSize(); + + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat128Vector(short[] v) { + super(v); + } + + // For compatibility as Halffloat128Vector::new, + // stored into species.vectorFactory. + Halffloat128Vector(Object v) { + this((short[]) v); + } + + static final Halffloat128Vector ZERO = new Halffloat128Vector(new short[VLENGTH]); + static final Halffloat128Vector IOTA = new Halffloat128Vector(VSPECIES.iotaArray()); + + static { + // Warm up a few species caches. + // If we do this too much we will + // get NPEs from bootstrap circularity. + VSPECIES.dummyVector(); + VSPECIES.withLanes(LaneType.BYTE); + } + + // Specialized extractors + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractVector, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + @Override + public final Class<Halffloat> elementType() { return Halffloat.class; } + + @ForceInline + @Override + public final int elementSize() { return Halffloat.SIZE; } + + @ForceInline + @Override + public final VectorShape shape() { return VSHAPE; } + + @ForceInline + @Override + public final int length() { return VLENGTH; } + + @ForceInline + @Override + public final int bitSize() { return VSIZE; } + + @ForceInline + @Override + public final int byteSize() { return VSIZE / Byte.SIZE; } + + /*package-private*/ + @ForceInline + final @Override + short[] vec() { + return (short[])getPayload(); + } + + // Virtualized constructors + + @Override + @ForceInline + public final Halffloat128Vector broadcast(short e) { + return (Halffloat128Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + public final Halffloat128Vector broadcast(long e) { + return (Halffloat128Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + Halffloat128Mask maskFromArray(boolean[] bits) { + return new Halffloat128Mask(bits); + } + + @Override + @ForceInline + Halffloat128Shuffle iotaShuffle() { return Halffloat128Shuffle.IOTA; } + + @ForceInline + Halffloat128Shuffle iotaShuffle(int start, int step, boolean wrap) { + if (wrap) { + return (Halffloat128Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat128Shuffle.class, VSPECIES, VLENGTH, start, step, 1, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); + } else { + return (Halffloat128Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat128Shuffle.class, VSPECIES, VLENGTH, start, step, 0, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); + } + } + + @Override + @ForceInline + Halffloat128Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat128Shuffle(reorder); } + + @Override + @ForceInline + Halffloat128Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat128Shuffle(indexes, i); } + + @Override + @ForceInline + Halffloat128Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat128Shuffle(fn); } + + // Make a vector of the same species but the given elements: + @ForceInline + final @Override + Halffloat128Vector vectorFactory(short[] vec) { + return new Halffloat128Vector(vec); + } + + @ForceInline + final @Override + Byte128Vector asByteVectorRaw() { + return (Byte128Vector) super.asByteVectorRawTemplate(); // specialize + } + + @ForceInline + final @Override + AbstractVector<?> asVectorRaw(LaneType laneType) { + return super.asVectorRawTemplate(laneType); // specialize + } + + // Unary operator + + @ForceInline + final @Override + Halffloat128Vector uOp(FUnOp f) { + return (Halffloat128Vector) super.uOpTemplate(f); // specialize + } + + @ForceInline + final @Override + Halffloat128Vector uOp(VectorMask<Halffloat> m, FUnOp f) { + return (Halffloat128Vector) + super.uOpTemplate((Halffloat128Mask)m, f); // specialize + } + + // Binary operator + + @ForceInline + final @Override + Halffloat128Vector bOp(Vector<Halffloat> v, FBinOp f) { + return (Halffloat128Vector) super.bOpTemplate((Halffloat128Vector)v, f); // specialize + } + + @ForceInline + final @Override + Halffloat128Vector bOp(Vector<Halffloat> v, + VectorMask<Halffloat> m, FBinOp f) { + return (Halffloat128Vector) + super.bOpTemplate((Halffloat128Vector)v, (Halffloat128Mask)m, + f); // specialize + } + + // Ternary operator + + @ForceInline + final @Override + Halffloat128Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, FTriOp f) { + return (Halffloat128Vector) + super.tOpTemplate((Halffloat128Vector)v1, (Halffloat128Vector)v2, + f); // specialize + } + + @ForceInline + final @Override + Halffloat128Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, + VectorMask<Halffloat> m, FTriOp f) { + return (Halffloat128Vector) + super.tOpTemplate((Halffloat128Vector)v1, (Halffloat128Vector)v2, + (Halffloat128Mask)m, f); // specialize + } + + @ForceInline + final @Override + short rOp(short v, VectorMask<Halffloat> m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize + } + + @Override + @ForceInline + public final <F> + Vector<F> convertShape(VectorOperators.Conversion<Halffloat,F> conv, + VectorSpecies<F> rsp, int part) { + return super.convertShapeTemplate(conv, rsp, part); // specialize + } + + @Override + @ForceInline + public final <F> + Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) { + return super.reinterpretShapeTemplate(toSpecies, part); // specialize + } + + // Specialized algebraic operations: + + // The following definition forces a specialized version of this + // crucial method into the v-table of this class. A call to add() + // will inline to a call to lanewise(ADD,), at which point the JIT + // intrinsic will have the opcode of ADD, plus all the metadata + // for this particular class, enabling it to generate precise + // code. + // + // There is probably no benefit to the JIT to specialize the + // masked or broadcast versions of the lanewise method. + + @Override + @ForceInline + public Halffloat128Vector lanewise(Unary op) { + return (Halffloat128Vector) super.lanewiseTemplate(op); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector lanewise(Unary op, VectorMask<Halffloat> m) { + return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector lanewise(Binary op, Vector<Halffloat> v) { + return (Halffloat128Vector) super.lanewiseTemplate(op, v); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector lanewise(Binary op, Vector<Halffloat> v, VectorMask<Halffloat> m) { + return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, v, (Halffloat128Mask) m); // specialize + } + + + /*package-private*/ + @Override + @ForceInline + public final + Halffloat128Vector + lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2) { + return (Halffloat128Vector) super.lanewiseTemplate(op, v1, v2); // specialize + } + + @Override + @ForceInline + public final + Halffloat128Vector + lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2, VectorMask<Halffloat> m) { + return (Halffloat128Vector) super.lanewiseTemplate(op, Halffloat128Mask.class, v1, v2, (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public final + Halffloat128Vector addIndex(int scale) { + return (Halffloat128Vector) super.addIndexTemplate(scale); // specialize + } + + // Type specific horizontal reductions + + @Override + @ForceInline + public final short reduceLanes(VectorOperators.Associative op) { + return super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final short reduceLanes(VectorOperators.Associative op, + VectorMask<Halffloat> m) { + return super.reduceLanesTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op) { + return (long) super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op, + VectorMask<Halffloat> m) { + return (long) super.reduceLanesTemplate(op, Halffloat128Mask.class, (Halffloat128Mask) m); // specialized + } + + @ForceInline + public VectorShuffle<Halffloat> toShuffle() { + return super.toShuffleTemplate(Halffloat128Shuffle.class); // specialize + } + + // Specialized unary testing + + @Override + @ForceInline + public final Halffloat128Mask test(Test op) { + return super.testTemplate(Halffloat128Mask.class, op); // specialize + } + + // Specialized comparisons + + @Override + @ForceInline + public final Halffloat128Mask compare(Comparison op, Vector<Halffloat> v) { + return super.compareTemplate(Halffloat128Mask.class, op, v); // specialize + } + + @Override + @ForceInline + public final Halffloat128Mask compare(Comparison op, short s) { + return super.compareTemplate(Halffloat128Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat128Mask compare(Comparison op, long s) { + return super.compareTemplate(Halffloat128Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat128Mask compare(Comparison op, Vector<Halffloat> v, VectorMask<Halffloat> m) { + return super.compareTemplate(Halffloat128Mask.class, op, v, (Halffloat128Mask) m); + } + + + @Override + @ForceInline + public Halffloat128Vector blend(Vector<Halffloat> v, VectorMask<Halffloat> m) { + return (Halffloat128Vector) + super.blendTemplate(Halffloat128Mask.class, + (Halffloat128Vector) v, + (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector slice(int origin, Vector<Halffloat> v) { + return (Halffloat128Vector) super.sliceTemplate(origin, v); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector slice(int origin) { + return (Halffloat128Vector) super.sliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector unslice(int origin, Vector<Halffloat> w, int part) { + return (Halffloat128Vector) super.unsliceTemplate(origin, w, part); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m) { + return (Halffloat128Vector) + super.unsliceTemplate(Halffloat128Mask.class, + origin, w, part, + (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector unslice(int origin) { + return (Halffloat128Vector) super.unsliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector rearrange(VectorShuffle<Halffloat> s) { + return (Halffloat128Vector) + super.rearrangeTemplate(Halffloat128Shuffle.class, + (Halffloat128Shuffle) s); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector rearrange(VectorShuffle<Halffloat> shuffle, + VectorMask<Halffloat> m) { + return (Halffloat128Vector) + super.rearrangeTemplate(Halffloat128Shuffle.class, + Halffloat128Mask.class, + (Halffloat128Shuffle) shuffle, + (Halffloat128Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector rearrange(VectorShuffle<Halffloat> s, + Vector<Halffloat> v) { + return (Halffloat128Vector) + super.rearrangeTemplate(Halffloat128Shuffle.class, + (Halffloat128Shuffle) s, + (Halffloat128Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector selectFrom(Vector<Halffloat> v) { + return (Halffloat128Vector) + super.selectFromTemplate((Halffloat128Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat128Vector selectFrom(Vector<Halffloat> v, + VectorMask<Halffloat> m) { + return (Halffloat128Vector) + super.selectFromTemplate((Halffloat128Vector) v, + (Halffloat128Mask) m); // specialize + } + + + @ForceInline + @Override + public short lane(int i) { + short bits; + switch(i) { + case 0: bits = laneHelper(0); break; + case 1: bits = laneHelper(1); break; + case 2: bits = laneHelper(2); break; + case 3: bits = laneHelper(3); break; + case 4: bits = laneHelper(4); break; + case 5: bits = laneHelper(5); break; + case 6: bits = laneHelper(6); break; + case 7: bits = laneHelper(7); break; + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + return Halffloat.shortBitsToHalffloat(bits); + } + + public short laneHelper(int i) { + return (short) VectorSupport.extract( + VCLASS, ETYPE, VLENGTH, + this, i, + (vec, ix) -> { + short[] vecarr = vec.vec(); + return (long)Halffloat.shortToShortBits(vecarr[ix]); + }); + } + + @ForceInline + @Override + public Halffloat128Vector withLane(int i, short e) { + switch(i) { + case 0: return withLaneHelper(0, e); + case 1: return withLaneHelper(1, e); + case 2: return withLaneHelper(2, e); + case 3: return withLaneHelper(3, e); + case 4: return withLaneHelper(4, e); + case 5: return withLaneHelper(5, e); + case 6: return withLaneHelper(6, e); + case 7: return withLaneHelper(7, e); + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + } + + public Halffloat128Vector withLaneHelper(int i, short e) { + return VectorSupport.insert( + VCLASS, ETYPE, VLENGTH, + this, i, (long)Halffloat.shortToShortBits(e), + (v, ix, bits) -> { + short[] res = v.vec().clone(); + res[ix] = Halffloat.shortBitsToHalffloat((short)bits); + return v.vectorFactory(res); + }); + } + + // Mask + + static final class Halffloat128Mask extends AbstractMask<Halffloat> { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat128Mask(boolean[] bits) { + this(bits, 0); + } + + Halffloat128Mask(boolean[] bits, int offset) { + super(prepare(bits, offset)); + } + + Halffloat128Mask(boolean val) { + super(prepare(val)); + } + + private static boolean[] prepare(boolean[] bits, int offset) { + boolean[] newBits = new boolean[VSPECIES.laneCount()]; + for (int i = 0; i < newBits.length; i++) { + newBits[i] = bits[offset + i]; + } + return newBits; + } + + private static boolean[] prepare(boolean val) { + boolean[] bits = new boolean[VSPECIES.laneCount()]; + Arrays.fill(bits, val); + return bits; + } + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractMask, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + boolean[] getBits() { + return (boolean[])getPayload(); + } + + @Override + Halffloat128Mask uOp(MUnOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i]); + } + return new Halffloat128Mask(res); + } + + @Override + Halffloat128Mask bOp(VectorMask<Halffloat> m, MBinOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + boolean[] mbits = ((Halffloat128Mask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i], mbits[i]); + } + return new Halffloat128Mask(res); + } + + @ForceInline + @Override + public final + Halffloat128Vector toVector() { + return (Halffloat128Vector) super.toVectorTemplate(); // specialize + } + + /** + * Helper function for lane-wise mask conversions. + * This function kicks in after intrinsic failure. + */ + @ForceInline + private final <E> + VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) { + if (length() != dsp.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + boolean[] maskArray = toArray(); + return dsp.maskFactory(maskArray).check(dsp); + } + + @Override + @ForceInline + public <E> VectorMask<E> cast(VectorSpecies<E> dsp) { + AbstractSpecies<E> species = (AbstractSpecies<E>) dsp; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); + } + + @Override + @ForceInline + public Halffloat128Mask eq(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat128Mask m = (Halffloat128Mask)mask; + return xor(m.not()); + } + + // Unary operations + + @Override + @ForceInline + public Halffloat128Mask not() { + return xor(maskAll(true)); + } + + // Binary operations + + @Override + @ForceInline + public Halffloat128Mask and(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat128Mask m = (Halffloat128Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat128Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); + } + + @Override + @ForceInline + public Halffloat128Mask or(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat128Mask m = (Halffloat128Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat128Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); + } + + @ForceInline + /* package-private */ + Halffloat128Mask xor(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat128Mask m = (Halffloat128Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat128Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + } + + // Mask Query operations + + @Override + @ForceInline + public int trueCount() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat128Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); + } + + @Override + @ForceInline + public int firstTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat128Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public int lastTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat128Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat128Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); + } + + // Reductions + + @Override + @ForceInline + public boolean anyTrue() { + return VectorSupport.test(BT_ne, Halffloat128Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> anyTrueHelper(((Halffloat128Mask)m).getBits())); + } + + @Override + @ForceInline + public boolean allTrue() { + return VectorSupport.test(BT_overflow, Halffloat128Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> allTrueHelper(((Halffloat128Mask)m).getBits())); + } + + @ForceInline + /*package-private*/ + static Halffloat128Mask maskAll(boolean bit) { + return VectorSupport.broadcastCoerced(Halffloat128Mask.class, short.class, VLENGTH, + (bit ? -1 : 0), null, + (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); + } + private static final Halffloat128Mask TRUE_MASK = new Halffloat128Mask(true); + private static final Halffloat128Mask FALSE_MASK = new Halffloat128Mask(false); + + } + + // Shuffle + + static final class Halffloat128Shuffle extends AbstractShuffle<Halffloat> { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat128Shuffle(byte[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat128Shuffle(int[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat128Shuffle(int[] reorder, int i) { + super(VLENGTH, reorder, i); + } + + public Halffloat128Shuffle(IntUnaryOperator fn) { + super(VLENGTH, fn); + } + + @Override + public HalffloatSpecies vspecies() { + return VSPECIES; + } + + static { + // There must be enough bits in the shuffle lanes to encode + // VLENGTH valid indexes and VLENGTH exceptional ones. + assert(VLENGTH < Byte.MAX_VALUE); + assert(Byte.MIN_VALUE <= -VLENGTH); + } + static final Halffloat128Shuffle IOTA = new Halffloat128Shuffle(IDENTITY); + + @Override + @ForceInline + public Halffloat128Vector toVector() { + return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat128Shuffle.class, this, VLENGTH, + (s) -> ((Halffloat128Vector)(((AbstractShuffle<Halffloat>)(s)).toVectorTemplate()))); + } + + @Override + @ForceInline + public <F> VectorShuffle<F> cast(VectorSpecies<F> s) { + AbstractSpecies<F> species = (AbstractSpecies<F>) s; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + int[] shuffleArray = toArray(); + return s.shuffleFromArray(shuffleArray, 0).check(s); + } + + @ForceInline + @Override + public Halffloat128Shuffle rearrange(VectorShuffle<Halffloat> shuffle) { + Halffloat128Shuffle s = (Halffloat128Shuffle) shuffle; + byte[] reorder1 = reorder(); + byte[] reorder2 = s.reorder(); + byte[] r = new byte[reorder1.length]; + for (int i = 0; i < reorder1.length; i++) { + int ssi = reorder2[i]; + r[i] = reorder1[ssi]; // throws on exceptional index + } + return new Halffloat128Shuffle(r); + } + } + + // ================================================ + + // Specialized low-level memory operations. + + @ForceInline + @Override + final + HalffloatVector fromArray0(short[] a, int offset) { + return super.fromArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m) { + return super.fromArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m); // specialize + } + + + @ForceInline + @Override + final + HalffloatVector fromCharArray0(char[] a, int offset) { + return super.fromCharArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m) { + return super.fromCharArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m); // specialize + } + + + @ForceInline + @Override + final + HalffloatVector fromByteArray0(byte[] a, int offset) { + return super.fromByteArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) { + return super.fromByteArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset) { + return super.fromByteBuffer0Template(bb, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) { + return super.fromByteBuffer0Template(Halffloat128Mask.class, bb, offset, (Halffloat128Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoArray0(short[] a, int offset) { + super.intoArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoArray0(short[] a, int offset, VectorMask<Halffloat> m) { + super.intoArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m); + } + + + + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset) { + super.intoByteArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) { + super.intoByteArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) { + super.intoByteBuffer0Template(Halffloat128Mask.class, bb, offset, (Halffloat128Mask) m); + } + + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m) { + super.intoCharArray0Template(Halffloat128Mask.class, a, offset, (Halffloat128Mask) m); + } + + // End of specialized low-level memory operations. + + // ================================================ + +} diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java new file mode 100644 index 00000000000..02269c2f4d8 --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat256Vector.java @@ -0,0 +1,935 @@ +/* + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.IntUnaryOperator; + +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +@SuppressWarnings("cast") // warning: redundant cast +final class Halffloat256Vector extends HalffloatVector { + static final HalffloatSpecies VSPECIES = + (HalffloatSpecies) HalffloatVector.SPECIES_256; + + static final VectorShape VSHAPE = + VSPECIES.vectorShape(); + + static final Class<Halffloat256Vector> VCLASS = Halffloat256Vector.class; + + static final int VSIZE = VSPECIES.vectorBitSize(); + + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat256Vector(short[] v) { + super(v); + } + + // For compatibility as Halffloat256Vector::new, + // stored into species.vectorFactory. + Halffloat256Vector(Object v) { + this((short[]) v); + } + + static final Halffloat256Vector ZERO = new Halffloat256Vector(new short[VLENGTH]); + static final Halffloat256Vector IOTA = new Halffloat256Vector(VSPECIES.iotaArray()); + + static { + // Warm up a few species caches. + // If we do this too much we will + // get NPEs from bootstrap circularity. + VSPECIES.dummyVector(); + VSPECIES.withLanes(LaneType.BYTE); + } + + // Specialized extractors + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractVector, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + @Override + public final Class<Halffloat> elementType() { return Halffloat.class; } + + @ForceInline + @Override + public final int elementSize() { return Halffloat.SIZE; } + + @ForceInline + @Override + public final VectorShape shape() { return VSHAPE; } + + @ForceInline + @Override + public final int length() { return VLENGTH; } + + @ForceInline + @Override + public final int bitSize() { return VSIZE; } + + @ForceInline + @Override + public final int byteSize() { return VSIZE / Byte.SIZE; } + + /*package-private*/ + @ForceInline + final @Override + short[] vec() { + return (short[])getPayload(); + } + + // Virtualized constructors + + @Override + @ForceInline + public final Halffloat256Vector broadcast(short e) { + return (Halffloat256Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + public final Halffloat256Vector broadcast(long e) { + return (Halffloat256Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + Halffloat256Mask maskFromArray(boolean[] bits) { + return new Halffloat256Mask(bits); + } + + @Override + @ForceInline + Halffloat256Shuffle iotaShuffle() { return Halffloat256Shuffle.IOTA; } + + @ForceInline + Halffloat256Shuffle iotaShuffle(int start, int step, boolean wrap) { + if (wrap) { + return (Halffloat256Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat256Shuffle.class, VSPECIES, VLENGTH, start, step, 1, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); + } else { + return (Halffloat256Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat256Shuffle.class, VSPECIES, VLENGTH, start, step, 0, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); + } + } + + @Override + @ForceInline + Halffloat256Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat256Shuffle(reorder); } + + @Override + @ForceInline + Halffloat256Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat256Shuffle(indexes, i); } + + @Override + @ForceInline + Halffloat256Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat256Shuffle(fn); } + + // Make a vector of the same species but the given elements: + @ForceInline + final @Override + Halffloat256Vector vectorFactory(short[] vec) { + return new Halffloat256Vector(vec); + } + + @ForceInline + final @Override + Byte256Vector asByteVectorRaw() { + return (Byte256Vector) super.asByteVectorRawTemplate(); // specialize + } + + @ForceInline + final @Override + AbstractVector<?> asVectorRaw(LaneType laneType) { + return super.asVectorRawTemplate(laneType); // specialize + } + + // Unary operator + + @ForceInline + final @Override + Halffloat256Vector uOp(FUnOp f) { + return (Halffloat256Vector) super.uOpTemplate(f); // specialize + } + + @ForceInline + final @Override + Halffloat256Vector uOp(VectorMask<Halffloat> m, FUnOp f) { + return (Halffloat256Vector) + super.uOpTemplate((Halffloat256Mask)m, f); // specialize + } + + // Binary operator + + @ForceInline + final @Override + Halffloat256Vector bOp(Vector<Halffloat> v, FBinOp f) { + return (Halffloat256Vector) super.bOpTemplate((Halffloat256Vector)v, f); // specialize + } + + @ForceInline + final @Override + Halffloat256Vector bOp(Vector<Halffloat> v, + VectorMask<Halffloat> m, FBinOp f) { + return (Halffloat256Vector) + super.bOpTemplate((Halffloat256Vector)v, (Halffloat256Mask)m, + f); // specialize + } + + // Ternary operator + + @ForceInline + final @Override + Halffloat256Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, FTriOp f) { + return (Halffloat256Vector) + super.tOpTemplate((Halffloat256Vector)v1, (Halffloat256Vector)v2, + f); // specialize + } + + @ForceInline + final @Override + Halffloat256Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, + VectorMask<Halffloat> m, FTriOp f) { + return (Halffloat256Vector) + super.tOpTemplate((Halffloat256Vector)v1, (Halffloat256Vector)v2, + (Halffloat256Mask)m, f); // specialize + } + + @ForceInline + final @Override + short rOp(short v, VectorMask<Halffloat> m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize + } + + @Override + @ForceInline + public final <F> + Vector<F> convertShape(VectorOperators.Conversion<Halffloat,F> conv, + VectorSpecies<F> rsp, int part) { + return super.convertShapeTemplate(conv, rsp, part); // specialize + } + + @Override + @ForceInline + public final <F> + Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) { + return super.reinterpretShapeTemplate(toSpecies, part); // specialize + } + + // Specialized algebraic operations: + + // The following definition forces a specialized version of this + // crucial method into the v-table of this class. A call to add() + // will inline to a call to lanewise(ADD,), at which point the JIT + // intrinsic will have the opcode of ADD, plus all the metadata + // for this particular class, enabling it to generate precise + // code. + // + // There is probably no benefit to the JIT to specialize the + // masked or broadcast versions of the lanewise method. + + @Override + @ForceInline + public Halffloat256Vector lanewise(Unary op) { + return (Halffloat256Vector) super.lanewiseTemplate(op); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector lanewise(Unary op, VectorMask<Halffloat> m) { + return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector lanewise(Binary op, Vector<Halffloat> v) { + return (Halffloat256Vector) super.lanewiseTemplate(op, v); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector lanewise(Binary op, Vector<Halffloat> v, VectorMask<Halffloat> m) { + return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, v, (Halffloat256Mask) m); // specialize + } + + + /*package-private*/ + @Override + @ForceInline + public final + Halffloat256Vector + lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2) { + return (Halffloat256Vector) super.lanewiseTemplate(op, v1, v2); // specialize + } + + @Override + @ForceInline + public final + Halffloat256Vector + lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2, VectorMask<Halffloat> m) { + return (Halffloat256Vector) super.lanewiseTemplate(op, Halffloat256Mask.class, v1, v2, (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public final + Halffloat256Vector addIndex(int scale) { + return (Halffloat256Vector) super.addIndexTemplate(scale); // specialize + } + + // Type specific horizontal reductions + + @Override + @ForceInline + public final short reduceLanes(VectorOperators.Associative op) { + return super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final short reduceLanes(VectorOperators.Associative op, + VectorMask<Halffloat> m) { + return super.reduceLanesTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op) { + return (long) super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op, + VectorMask<Halffloat> m) { + return (long) super.reduceLanesTemplate(op, Halffloat256Mask.class, (Halffloat256Mask) m); // specialized + } + + @ForceInline + public VectorShuffle<Halffloat> toShuffle() { + return super.toShuffleTemplate(Halffloat256Shuffle.class); // specialize + } + + // Specialized unary testing + + @Override + @ForceInline + public final Halffloat256Mask test(Test op) { + return super.testTemplate(Halffloat256Mask.class, op); // specialize + } + + // Specialized comparisons + + @Override + @ForceInline + public final Halffloat256Mask compare(Comparison op, Vector<Halffloat> v) { + return super.compareTemplate(Halffloat256Mask.class, op, v); // specialize + } + + @Override + @ForceInline + public final Halffloat256Mask compare(Comparison op, short s) { + return super.compareTemplate(Halffloat256Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat256Mask compare(Comparison op, long s) { + return super.compareTemplate(Halffloat256Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat256Mask compare(Comparison op, Vector<Halffloat> v, VectorMask<Halffloat> m) { + return super.compareTemplate(Halffloat256Mask.class, op, v, (Halffloat256Mask) m); + } + + + @Override + @ForceInline + public Halffloat256Vector blend(Vector<Halffloat> v, VectorMask<Halffloat> m) { + return (Halffloat256Vector) + super.blendTemplate(Halffloat256Mask.class, + (Halffloat256Vector) v, + (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector slice(int origin, Vector<Halffloat> v) { + return (Halffloat256Vector) super.sliceTemplate(origin, v); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector slice(int origin) { + return (Halffloat256Vector) super.sliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector unslice(int origin, Vector<Halffloat> w, int part) { + return (Halffloat256Vector) super.unsliceTemplate(origin, w, part); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m) { + return (Halffloat256Vector) + super.unsliceTemplate(Halffloat256Mask.class, + origin, w, part, + (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector unslice(int origin) { + return (Halffloat256Vector) super.unsliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector rearrange(VectorShuffle<Halffloat> s) { + return (Halffloat256Vector) + super.rearrangeTemplate(Halffloat256Shuffle.class, + (Halffloat256Shuffle) s); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector rearrange(VectorShuffle<Halffloat> shuffle, + VectorMask<Halffloat> m) { + return (Halffloat256Vector) + super.rearrangeTemplate(Halffloat256Shuffle.class, + Halffloat256Mask.class, + (Halffloat256Shuffle) shuffle, + (Halffloat256Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector rearrange(VectorShuffle<Halffloat> s, + Vector<Halffloat> v) { + return (Halffloat256Vector) + super.rearrangeTemplate(Halffloat256Shuffle.class, + (Halffloat256Shuffle) s, + (Halffloat256Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector selectFrom(Vector<Halffloat> v) { + return (Halffloat256Vector) + super.selectFromTemplate((Halffloat256Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat256Vector selectFrom(Vector<Halffloat> v, + VectorMask<Halffloat> m) { + return (Halffloat256Vector) + super.selectFromTemplate((Halffloat256Vector) v, + (Halffloat256Mask) m); // specialize + } + + + @ForceInline + @Override + public short lane(int i) { + short bits; + switch(i) { + case 0: bits = laneHelper(0); break; + case 1: bits = laneHelper(1); break; + case 2: bits = laneHelper(2); break; + case 3: bits = laneHelper(3); break; + case 4: bits = laneHelper(4); break; + case 5: bits = laneHelper(5); break; + case 6: bits = laneHelper(6); break; + case 7: bits = laneHelper(7); break; + case 8: bits = laneHelper(8); break; + case 9: bits = laneHelper(9); break; + case 10: bits = laneHelper(10); break; + case 11: bits = laneHelper(11); break; + case 12: bits = laneHelper(12); break; + case 13: bits = laneHelper(13); break; + case 14: bits = laneHelper(14); break; + case 15: bits = laneHelper(15); break; + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + return Halffloat.shortBitsToHalffloat(bits); + } + + public short laneHelper(int i) { + return (short) VectorSupport.extract( + VCLASS, ETYPE, VLENGTH, + this, i, + (vec, ix) -> { + short[] vecarr = vec.vec(); + return (long)Halffloat.shortToShortBits(vecarr[ix]); + }); + } + + @ForceInline + @Override + public Halffloat256Vector withLane(int i, short e) { + switch(i) { + case 0: return withLaneHelper(0, e); + case 1: return withLaneHelper(1, e); + case 2: return withLaneHelper(2, e); + case 3: return withLaneHelper(3, e); + case 4: return withLaneHelper(4, e); + case 5: return withLaneHelper(5, e); + case 6: return withLaneHelper(6, e); + case 7: return withLaneHelper(7, e); + case 8: return withLaneHelper(8, e); + case 9: return withLaneHelper(9, e); + case 10: return withLaneHelper(10, e); + case 11: return withLaneHelper(11, e); + case 12: return withLaneHelper(12, e); + case 13: return withLaneHelper(13, e); + case 14: return withLaneHelper(14, e); + case 15: return withLaneHelper(15, e); + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + } + + public Halffloat256Vector withLaneHelper(int i, short e) { + return VectorSupport.insert( + VCLASS, ETYPE, VLENGTH, + this, i, (long)Halffloat.shortToShortBits(e), + (v, ix, bits) -> { + short[] res = v.vec().clone(); + res[ix] = Halffloat.shortBitsToHalffloat((short)bits); + return v.vectorFactory(res); + }); + } + + // Mask + + static final class Halffloat256Mask extends AbstractMask<Halffloat> { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat256Mask(boolean[] bits) { + this(bits, 0); + } + + Halffloat256Mask(boolean[] bits, int offset) { + super(prepare(bits, offset)); + } + + Halffloat256Mask(boolean val) { + super(prepare(val)); + } + + private static boolean[] prepare(boolean[] bits, int offset) { + boolean[] newBits = new boolean[VSPECIES.laneCount()]; + for (int i = 0; i < newBits.length; i++) { + newBits[i] = bits[offset + i]; + } + return newBits; + } + + private static boolean[] prepare(boolean val) { + boolean[] bits = new boolean[VSPECIES.laneCount()]; + Arrays.fill(bits, val); + return bits; + } + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractMask, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + boolean[] getBits() { + return (boolean[])getPayload(); + } + + @Override + Halffloat256Mask uOp(MUnOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i]); + } + return new Halffloat256Mask(res); + } + + @Override + Halffloat256Mask bOp(VectorMask<Halffloat> m, MBinOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + boolean[] mbits = ((Halffloat256Mask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i], mbits[i]); + } + return new Halffloat256Mask(res); + } + + @ForceInline + @Override + public final + Halffloat256Vector toVector() { + return (Halffloat256Vector) super.toVectorTemplate(); // specialize + } + + /** + * Helper function for lane-wise mask conversions. + * This function kicks in after intrinsic failure. + */ + @ForceInline + private final <E> + VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) { + if (length() != dsp.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + boolean[] maskArray = toArray(); + return dsp.maskFactory(maskArray).check(dsp); + } + + @Override + @ForceInline + public <E> VectorMask<E> cast(VectorSpecies<E> dsp) { + AbstractSpecies<E> species = (AbstractSpecies<E>) dsp; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); + } + + @Override + @ForceInline + public Halffloat256Mask eq(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat256Mask m = (Halffloat256Mask)mask; + return xor(m.not()); + } + + // Unary operations + + @Override + @ForceInline + public Halffloat256Mask not() { + return xor(maskAll(true)); + } + + // Binary operations + + @Override + @ForceInline + public Halffloat256Mask and(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat256Mask m = (Halffloat256Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat256Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); + } + + @Override + @ForceInline + public Halffloat256Mask or(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat256Mask m = (Halffloat256Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat256Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); + } + + @ForceInline + /* package-private */ + Halffloat256Mask xor(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat256Mask m = (Halffloat256Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat256Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + } + + // Mask Query operations + + @Override + @ForceInline + public int trueCount() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat256Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); + } + + @Override + @ForceInline + public int firstTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat256Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public int lastTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat256Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat256Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); + } + + // Reductions + + @Override + @ForceInline + public boolean anyTrue() { + return VectorSupport.test(BT_ne, Halffloat256Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> anyTrueHelper(((Halffloat256Mask)m).getBits())); + } + + @Override + @ForceInline + public boolean allTrue() { + return VectorSupport.test(BT_overflow, Halffloat256Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> allTrueHelper(((Halffloat256Mask)m).getBits())); + } + + @ForceInline + /*package-private*/ + static Halffloat256Mask maskAll(boolean bit) { + return VectorSupport.broadcastCoerced(Halffloat256Mask.class, short.class, VLENGTH, + (bit ? -1 : 0), null, + (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); + } + private static final Halffloat256Mask TRUE_MASK = new Halffloat256Mask(true); + private static final Halffloat256Mask FALSE_MASK = new Halffloat256Mask(false); + + } + + // Shuffle + + static final class Halffloat256Shuffle extends AbstractShuffle<Halffloat> { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat256Shuffle(byte[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat256Shuffle(int[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat256Shuffle(int[] reorder, int i) { + super(VLENGTH, reorder, i); + } + + public Halffloat256Shuffle(IntUnaryOperator fn) { + super(VLENGTH, fn); + } + + @Override + public HalffloatSpecies vspecies() { + return VSPECIES; + } + + static { + // There must be enough bits in the shuffle lanes to encode + // VLENGTH valid indexes and VLENGTH exceptional ones. + assert(VLENGTH < Byte.MAX_VALUE); + assert(Byte.MIN_VALUE <= -VLENGTH); + } + static final Halffloat256Shuffle IOTA = new Halffloat256Shuffle(IDENTITY); + + @Override + @ForceInline + public Halffloat256Vector toVector() { + return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat256Shuffle.class, this, VLENGTH, + (s) -> ((Halffloat256Vector)(((AbstractShuffle<Halffloat>)(s)).toVectorTemplate()))); + } + + @Override + @ForceInline + public <F> VectorShuffle<F> cast(VectorSpecies<F> s) { + AbstractSpecies<F> species = (AbstractSpecies<F>) s; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + int[] shuffleArray = toArray(); + return s.shuffleFromArray(shuffleArray, 0).check(s); + } + + @ForceInline + @Override + public Halffloat256Shuffle rearrange(VectorShuffle<Halffloat> shuffle) { + Halffloat256Shuffle s = (Halffloat256Shuffle) shuffle; + byte[] reorder1 = reorder(); + byte[] reorder2 = s.reorder(); + byte[] r = new byte[reorder1.length]; + for (int i = 0; i < reorder1.length; i++) { + int ssi = reorder2[i]; + r[i] = reorder1[ssi]; // throws on exceptional index + } + return new Halffloat256Shuffle(r); + } + } + + // ================================================ + + // Specialized low-level memory operations. + + @ForceInline + @Override + final + HalffloatVector fromArray0(short[] a, int offset) { + return super.fromArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m) { + return super.fromArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m); // specialize + } + + + @ForceInline + @Override + final + HalffloatVector fromCharArray0(char[] a, int offset) { + return super.fromCharArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m) { + return super.fromCharArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m); // specialize + } + + + @ForceInline + @Override + final + HalffloatVector fromByteArray0(byte[] a, int offset) { + return super.fromByteArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) { + return super.fromByteArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset) { + return super.fromByteBuffer0Template(bb, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) { + return super.fromByteBuffer0Template(Halffloat256Mask.class, bb, offset, (Halffloat256Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoArray0(short[] a, int offset) { + super.intoArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoArray0(short[] a, int offset, VectorMask<Halffloat> m) { + super.intoArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m); + } + + + + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset) { + super.intoByteArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) { + super.intoByteArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) { + super.intoByteBuffer0Template(Halffloat256Mask.class, bb, offset, (Halffloat256Mask) m); + } + + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m) { + super.intoCharArray0Template(Halffloat256Mask.class, a, offset, (Halffloat256Mask) m); + } + + // End of specialized low-level memory operations. + + // ================================================ + +} diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java new file mode 100644 index 00000000000..4478c74d31f --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat512Vector.java @@ -0,0 +1,935 @@ +/* + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.IntUnaryOperator; + +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +@SuppressWarnings("cast") // warning: redundant cast +final class Halffloat512Vector extends HalffloatVector { + static final HalffloatSpecies VSPECIES = + (HalffloatSpecies) HalffloatVector.SPECIES_512; + + static final VectorShape VSHAPE = + VSPECIES.vectorShape(); + + static final Class<Halffloat512Vector> VCLASS = Halffloat512Vector.class; + + static final int VSIZE = VSPECIES.vectorBitSize(); + + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat512Vector(short[] v) { + super(v); + } + + // For compatibility as Halffloat512Vector::new, + // stored into species.vectorFactory. + Halffloat512Vector(Object v) { + this((short[]) v); + } + + static final Halffloat512Vector ZERO = new Halffloat512Vector(new short[VLENGTH]); + static final Halffloat512Vector IOTA = new Halffloat512Vector(VSPECIES.iotaArray()); + + static { + // Warm up a few species caches. + // If we do this too much we will + // get NPEs from bootstrap circularity. + VSPECIES.dummyVector(); + VSPECIES.withLanes(LaneType.BYTE); + } + + // Specialized extractors + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractVector, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + @Override + public final Class<Halffloat> elementType() { return Halffloat.class; } + + @ForceInline + @Override + public final int elementSize() { return Halffloat.SIZE; } + + @ForceInline + @Override + public final VectorShape shape() { return VSHAPE; } + + @ForceInline + @Override + public final int length() { return VLENGTH; } + + @ForceInline + @Override + public final int bitSize() { return VSIZE; } + + @ForceInline + @Override + public final int byteSize() { return VSIZE / Byte.SIZE; } + + /*package-private*/ + @ForceInline + final @Override + short[] vec() { + return (short[])getPayload(); + } + + // Virtualized constructors + + @Override + @ForceInline + public final Halffloat512Vector broadcast(short e) { + return (Halffloat512Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + public final Halffloat512Vector broadcast(long e) { + return (Halffloat512Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + Halffloat512Mask maskFromArray(boolean[] bits) { + return new Halffloat512Mask(bits); + } + + @Override + @ForceInline + Halffloat512Shuffle iotaShuffle() { return Halffloat512Shuffle.IOTA; } + + @ForceInline + Halffloat512Shuffle iotaShuffle(int start, int step, boolean wrap) { + if (wrap) { + return (Halffloat512Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat512Shuffle.class, VSPECIES, VLENGTH, start, step, 1, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); + } else { + return (Halffloat512Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat512Shuffle.class, VSPECIES, VLENGTH, start, step, 0, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); + } + } + + @Override + @ForceInline + Halffloat512Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat512Shuffle(reorder); } + + @Override + @ForceInline + Halffloat512Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat512Shuffle(indexes, i); } + + @Override + @ForceInline + Halffloat512Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat512Shuffle(fn); } + + // Make a vector of the same species but the given elements: + @ForceInline + final @Override + Halffloat512Vector vectorFactory(short[] vec) { + return new Halffloat512Vector(vec); + } + + @ForceInline + final @Override + Byte512Vector asByteVectorRaw() { + return (Byte512Vector) super.asByteVectorRawTemplate(); // specialize + } + + @ForceInline + final @Override + AbstractVector<?> asVectorRaw(LaneType laneType) { + return super.asVectorRawTemplate(laneType); // specialize + } + + // Unary operator + + @ForceInline + final @Override + Halffloat512Vector uOp(FUnOp f) { + return (Halffloat512Vector) super.uOpTemplate(f); // specialize + } + + @ForceInline + final @Override + Halffloat512Vector uOp(VectorMask<Halffloat> m, FUnOp f) { + return (Halffloat512Vector) + super.uOpTemplate((Halffloat512Mask)m, f); // specialize + } + + // Binary operator + + @ForceInline + final @Override + Halffloat512Vector bOp(Vector<Halffloat> v, FBinOp f) { + return (Halffloat512Vector) super.bOpTemplate((Halffloat512Vector)v, f); // specialize + } + + @ForceInline + final @Override + Halffloat512Vector bOp(Vector<Halffloat> v, + VectorMask<Halffloat> m, FBinOp f) { + return (Halffloat512Vector) + super.bOpTemplate((Halffloat512Vector)v, (Halffloat512Mask)m, + f); // specialize + } + + // Ternary operator + + @ForceInline + final @Override + Halffloat512Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, FTriOp f) { + return (Halffloat512Vector) + super.tOpTemplate((Halffloat512Vector)v1, (Halffloat512Vector)v2, + f); // specialize + } + + @ForceInline + final @Override + Halffloat512Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, + VectorMask<Halffloat> m, FTriOp f) { + return (Halffloat512Vector) + super.tOpTemplate((Halffloat512Vector)v1, (Halffloat512Vector)v2, + (Halffloat512Mask)m, f); // specialize + } + + @ForceInline + final @Override + short rOp(short v, VectorMask<Halffloat> m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize + } + + @Override + @ForceInline + public final <F> + Vector<F> convertShape(VectorOperators.Conversion<Halffloat,F> conv, + VectorSpecies<F> rsp, int part) { + return super.convertShapeTemplate(conv, rsp, part); // specialize + } + + @Override + @ForceInline + public final <F> + Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) { + return super.reinterpretShapeTemplate(toSpecies, part); // specialize + } + + // Specialized algebraic operations: + + // The following definition forces a specialized version of this + // crucial method into the v-table of this class. A call to add() + // will inline to a call to lanewise(ADD,), at which point the JIT + // intrinsic will have the opcode of ADD, plus all the metadata + // for this particular class, enabling it to generate precise + // code. + // + // There is probably no benefit to the JIT to specialize the + // masked or broadcast versions of the lanewise method. + + @Override + @ForceInline + public Halffloat512Vector lanewise(Unary op) { + return (Halffloat512Vector) super.lanewiseTemplate(op); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector lanewise(Unary op, VectorMask<Halffloat> m) { + return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector lanewise(Binary op, Vector<Halffloat> v) { + return (Halffloat512Vector) super.lanewiseTemplate(op, v); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector lanewise(Binary op, Vector<Halffloat> v, VectorMask<Halffloat> m) { + return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, v, (Halffloat512Mask) m); // specialize + } + + + /*package-private*/ + @Override + @ForceInline + public final + Halffloat512Vector + lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2) { + return (Halffloat512Vector) super.lanewiseTemplate(op, v1, v2); // specialize + } + + @Override + @ForceInline + public final + Halffloat512Vector + lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2, VectorMask<Halffloat> m) { + return (Halffloat512Vector) super.lanewiseTemplate(op, Halffloat512Mask.class, v1, v2, (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public final + Halffloat512Vector addIndex(int scale) { + return (Halffloat512Vector) super.addIndexTemplate(scale); // specialize + } + + // Type specific horizontal reductions + + @Override + @ForceInline + public final short reduceLanes(VectorOperators.Associative op) { + return super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final short reduceLanes(VectorOperators.Associative op, + VectorMask<Halffloat> m) { + return super.reduceLanesTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op) { + return (long) super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op, + VectorMask<Halffloat> m) { + return (long) super.reduceLanesTemplate(op, Halffloat512Mask.class, (Halffloat512Mask) m); // specialized + } + + @ForceInline + public VectorShuffle<Halffloat> toShuffle() { + return super.toShuffleTemplate(Halffloat512Shuffle.class); // specialize + } + + // Specialized unary testing + + @Override + @ForceInline + public final Halffloat512Mask test(Test op) { + return super.testTemplate(Halffloat512Mask.class, op); // specialize + } + + // Specialized comparisons + + @Override + @ForceInline + public final Halffloat512Mask compare(Comparison op, Vector<Halffloat> v) { + return super.compareTemplate(Halffloat512Mask.class, op, v); // specialize + } + + @Override + @ForceInline + public final Halffloat512Mask compare(Comparison op, short s) { + return super.compareTemplate(Halffloat512Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat512Mask compare(Comparison op, long s) { + return super.compareTemplate(Halffloat512Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat512Mask compare(Comparison op, Vector<Halffloat> v, VectorMask<Halffloat> m) { + return super.compareTemplate(Halffloat512Mask.class, op, v, (Halffloat512Mask) m); + } + + + @Override + @ForceInline + public Halffloat512Vector blend(Vector<Halffloat> v, VectorMask<Halffloat> m) { + return (Halffloat512Vector) + super.blendTemplate(Halffloat512Mask.class, + (Halffloat512Vector) v, + (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector slice(int origin, Vector<Halffloat> v) { + return (Halffloat512Vector) super.sliceTemplate(origin, v); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector slice(int origin) { + return (Halffloat512Vector) super.sliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector unslice(int origin, Vector<Halffloat> w, int part) { + return (Halffloat512Vector) super.unsliceTemplate(origin, w, part); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m) { + return (Halffloat512Vector) + super.unsliceTemplate(Halffloat512Mask.class, + origin, w, part, + (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector unslice(int origin) { + return (Halffloat512Vector) super.unsliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector rearrange(VectorShuffle<Halffloat> s) { + return (Halffloat512Vector) + super.rearrangeTemplate(Halffloat512Shuffle.class, + (Halffloat512Shuffle) s); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector rearrange(VectorShuffle<Halffloat> shuffle, + VectorMask<Halffloat> m) { + return (Halffloat512Vector) + super.rearrangeTemplate(Halffloat512Shuffle.class, + Halffloat512Mask.class, + (Halffloat512Shuffle) shuffle, + (Halffloat512Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector rearrange(VectorShuffle<Halffloat> s, + Vector<Halffloat> v) { + return (Halffloat512Vector) + super.rearrangeTemplate(Halffloat512Shuffle.class, + (Halffloat512Shuffle) s, + (Halffloat512Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector selectFrom(Vector<Halffloat> v) { + return (Halffloat512Vector) + super.selectFromTemplate((Halffloat512Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat512Vector selectFrom(Vector<Halffloat> v, + VectorMask<Halffloat> m) { + return (Halffloat512Vector) + super.selectFromTemplate((Halffloat512Vector) v, + (Halffloat512Mask) m); // specialize + } + + + @ForceInline + @Override + public short lane(int i) { + short bits; + switch(i) { + case 0: bits = laneHelper(0); break; + case 1: bits = laneHelper(1); break; + case 2: bits = laneHelper(2); break; + case 3: bits = laneHelper(3); break; + case 4: bits = laneHelper(4); break; + case 5: bits = laneHelper(5); break; + case 6: bits = laneHelper(6); break; + case 7: bits = laneHelper(7); break; + case 8: bits = laneHelper(8); break; + case 9: bits = laneHelper(9); break; + case 10: bits = laneHelper(10); break; + case 11: bits = laneHelper(11); break; + case 12: bits = laneHelper(12); break; + case 13: bits = laneHelper(13); break; + case 14: bits = laneHelper(14); break; + case 15: bits = laneHelper(15); break; + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + return Halffloat.shortBitsToHalffloat(bits); + } + + public short laneHelper(int i) { + return (short) VectorSupport.extract( + VCLASS, ETYPE, VLENGTH, + this, i, + (vec, ix) -> { + short[] vecarr = vec.vec(); + return (long)Halffloat.shortToShortBits(vecarr[ix]); + }); + } + + @ForceInline + @Override + public Halffloat512Vector withLane(int i, short e) { + switch(i) { + case 0: return withLaneHelper(0, e); + case 1: return withLaneHelper(1, e); + case 2: return withLaneHelper(2, e); + case 3: return withLaneHelper(3, e); + case 4: return withLaneHelper(4, e); + case 5: return withLaneHelper(5, e); + case 6: return withLaneHelper(6, e); + case 7: return withLaneHelper(7, e); + case 8: return withLaneHelper(8, e); + case 9: return withLaneHelper(9, e); + case 10: return withLaneHelper(10, e); + case 11: return withLaneHelper(11, e); + case 12: return withLaneHelper(12, e); + case 13: return withLaneHelper(13, e); + case 14: return withLaneHelper(14, e); + case 15: return withLaneHelper(15, e); + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + } + + public Halffloat512Vector withLaneHelper(int i, short e) { + return VectorSupport.insert( + VCLASS, ETYPE, VLENGTH, + this, i, (long)Halffloat.shortToShortBits(e), + (v, ix, bits) -> { + short[] res = v.vec().clone(); + res[ix] = Halffloat.shortBitsToHalffloat((short)bits); + return v.vectorFactory(res); + }); + } + + // Mask + + static final class Halffloat512Mask extends AbstractMask<Halffloat> { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat512Mask(boolean[] bits) { + this(bits, 0); + } + + Halffloat512Mask(boolean[] bits, int offset) { + super(prepare(bits, offset)); + } + + Halffloat512Mask(boolean val) { + super(prepare(val)); + } + + private static boolean[] prepare(boolean[] bits, int offset) { + boolean[] newBits = new boolean[VSPECIES.laneCount()]; + for (int i = 0; i < newBits.length; i++) { + newBits[i] = bits[offset + i]; + } + return newBits; + } + + private static boolean[] prepare(boolean val) { + boolean[] bits = new boolean[VSPECIES.laneCount()]; + Arrays.fill(bits, val); + return bits; + } + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractMask, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + boolean[] getBits() { + return (boolean[])getPayload(); + } + + @Override + Halffloat512Mask uOp(MUnOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i]); + } + return new Halffloat512Mask(res); + } + + @Override + Halffloat512Mask bOp(VectorMask<Halffloat> m, MBinOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + boolean[] mbits = ((Halffloat512Mask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i], mbits[i]); + } + return new Halffloat512Mask(res); + } + + @ForceInline + @Override + public final + Halffloat512Vector toVector() { + return (Halffloat512Vector) super.toVectorTemplate(); // specialize + } + + /** + * Helper function for lane-wise mask conversions. + * This function kicks in after intrinsic failure. + */ + @ForceInline + private final <E> + VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) { + if (length() != dsp.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + boolean[] maskArray = toArray(); + return dsp.maskFactory(maskArray).check(dsp); + } + + @Override + @ForceInline + public <E> VectorMask<E> cast(VectorSpecies<E> dsp) { + AbstractSpecies<E> species = (AbstractSpecies<E>) dsp; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); + } + + @Override + @ForceInline + public Halffloat512Mask eq(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat512Mask m = (Halffloat512Mask)mask; + return xor(m.not()); + } + + // Unary operations + + @Override + @ForceInline + public Halffloat512Mask not() { + return xor(maskAll(true)); + } + + // Binary operations + + @Override + @ForceInline + public Halffloat512Mask and(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat512Mask m = (Halffloat512Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat512Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); + } + + @Override + @ForceInline + public Halffloat512Mask or(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat512Mask m = (Halffloat512Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat512Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); + } + + @ForceInline + /* package-private */ + Halffloat512Mask xor(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat512Mask m = (Halffloat512Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat512Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + } + + // Mask Query operations + + @Override + @ForceInline + public int trueCount() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat512Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); + } + + @Override + @ForceInline + public int firstTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat512Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public int lastTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat512Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat512Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); + } + + // Reductions + + @Override + @ForceInline + public boolean anyTrue() { + return VectorSupport.test(BT_ne, Halffloat512Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> anyTrueHelper(((Halffloat512Mask)m).getBits())); + } + + @Override + @ForceInline + public boolean allTrue() { + return VectorSupport.test(BT_overflow, Halffloat512Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> allTrueHelper(((Halffloat512Mask)m).getBits())); + } + + @ForceInline + /*package-private*/ + static Halffloat512Mask maskAll(boolean bit) { + return VectorSupport.broadcastCoerced(Halffloat512Mask.class, short.class, VLENGTH, + (bit ? -1 : 0), null, + (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); + } + private static final Halffloat512Mask TRUE_MASK = new Halffloat512Mask(true); + private static final Halffloat512Mask FALSE_MASK = new Halffloat512Mask(false); + + } + + // Shuffle + + static final class Halffloat512Shuffle extends AbstractShuffle<Halffloat> { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat512Shuffle(byte[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat512Shuffle(int[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat512Shuffle(int[] reorder, int i) { + super(VLENGTH, reorder, i); + } + + public Halffloat512Shuffle(IntUnaryOperator fn) { + super(VLENGTH, fn); + } + + @Override + public HalffloatSpecies vspecies() { + return VSPECIES; + } + + static { + // There must be enough bits in the shuffle lanes to encode + // VLENGTH valid indexes and VLENGTH exceptional ones. + assert(VLENGTH < Byte.MAX_VALUE); + assert(Byte.MIN_VALUE <= -VLENGTH); + } + static final Halffloat512Shuffle IOTA = new Halffloat512Shuffle(IDENTITY); + + @Override + @ForceInline + public Halffloat512Vector toVector() { + return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat512Shuffle.class, this, VLENGTH, + (s) -> ((Halffloat512Vector)(((AbstractShuffle<Halffloat>)(s)).toVectorTemplate()))); + } + + @Override + @ForceInline + public <F> VectorShuffle<F> cast(VectorSpecies<F> s) { + AbstractSpecies<F> species = (AbstractSpecies<F>) s; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + int[] shuffleArray = toArray(); + return s.shuffleFromArray(shuffleArray, 0).check(s); + } + + @ForceInline + @Override + public Halffloat512Shuffle rearrange(VectorShuffle<Halffloat> shuffle) { + Halffloat512Shuffle s = (Halffloat512Shuffle) shuffle; + byte[] reorder1 = reorder(); + byte[] reorder2 = s.reorder(); + byte[] r = new byte[reorder1.length]; + for (int i = 0; i < reorder1.length; i++) { + int ssi = reorder2[i]; + r[i] = reorder1[ssi]; // throws on exceptional index + } + return new Halffloat512Shuffle(r); + } + } + + // ================================================ + + // Specialized low-level memory operations. + + @ForceInline + @Override + final + HalffloatVector fromArray0(short[] a, int offset) { + return super.fromArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m) { + return super.fromArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m); // specialize + } + + + @ForceInline + @Override + final + HalffloatVector fromCharArray0(char[] a, int offset) { + return super.fromCharArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m) { + return super.fromCharArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m); // specialize + } + + + @ForceInline + @Override + final + HalffloatVector fromByteArray0(byte[] a, int offset) { + return super.fromByteArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) { + return super.fromByteArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset) { + return super.fromByteBuffer0Template(bb, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) { + return super.fromByteBuffer0Template(Halffloat512Mask.class, bb, offset, (Halffloat512Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoArray0(short[] a, int offset) { + super.intoArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoArray0(short[] a, int offset, VectorMask<Halffloat> m) { + super.intoArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m); + } + + + + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset) { + super.intoByteArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) { + super.intoByteArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) { + super.intoByteBuffer0Template(Halffloat512Mask.class, bb, offset, (Halffloat512Mask) m); + } + + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m) { + super.intoCharArray0Template(Halffloat512Mask.class, a, offset, (Halffloat512Mask) m); + } + + // End of specialized low-level memory operations. + + // ================================================ + +} diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java new file mode 100644 index 00000000000..ccc9ea45ac9 --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Halffloat64Vector.java @@ -0,0 +1,911 @@ +/* + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.IntUnaryOperator; + +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +@SuppressWarnings("cast") // warning: redundant cast +final class Halffloat64Vector extends HalffloatVector { + static final HalffloatSpecies VSPECIES = + (HalffloatSpecies) HalffloatVector.SPECIES_64; + + static final VectorShape VSHAPE = + VSPECIES.vectorShape(); + + static final Class<Halffloat64Vector> VCLASS = Halffloat64Vector.class; + + static final int VSIZE = VSPECIES.vectorBitSize(); + + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat64Vector(short[] v) { + super(v); + } + + // For compatibility as Halffloat64Vector::new, + // stored into species.vectorFactory. + Halffloat64Vector(Object v) { + this((short[]) v); + } + + static final Halffloat64Vector ZERO = new Halffloat64Vector(new short[VLENGTH]); + static final Halffloat64Vector IOTA = new Halffloat64Vector(VSPECIES.iotaArray()); + + static { + // Warm up a few species caches. + // If we do this too much we will + // get NPEs from bootstrap circularity. + VSPECIES.dummyVector(); + VSPECIES.withLanes(LaneType.BYTE); + } + + // Specialized extractors + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractVector, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + @Override + public final Class<Halffloat> elementType() { return Halffloat.class; } + + @ForceInline + @Override + public final int elementSize() { return Halffloat.SIZE; } + + @ForceInline + @Override + public final VectorShape shape() { return VSHAPE; } + + @ForceInline + @Override + public final int length() { return VLENGTH; } + + @ForceInline + @Override + public final int bitSize() { return VSIZE; } + + @ForceInline + @Override + public final int byteSize() { return VSIZE / Byte.SIZE; } + + /*package-private*/ + @ForceInline + final @Override + short[] vec() { + return (short[])getPayload(); + } + + // Virtualized constructors + + @Override + @ForceInline + public final Halffloat64Vector broadcast(short e) { + return (Halffloat64Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + public final Halffloat64Vector broadcast(long e) { + return (Halffloat64Vector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + Halffloat64Mask maskFromArray(boolean[] bits) { + return new Halffloat64Mask(bits); + } + + @Override + @ForceInline + Halffloat64Shuffle iotaShuffle() { return Halffloat64Shuffle.IOTA; } + + @ForceInline + Halffloat64Shuffle iotaShuffle(int start, int step, boolean wrap) { + if (wrap) { + return (Halffloat64Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat64Shuffle.class, VSPECIES, VLENGTH, start, step, 1, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); + } else { + return (Halffloat64Shuffle)VectorSupport.shuffleIota(ETYPE, Halffloat64Shuffle.class, VSPECIES, VLENGTH, start, step, 0, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); + } + } + + @Override + @ForceInline + Halffloat64Shuffle shuffleFromBytes(byte[] reorder) { return new Halffloat64Shuffle(reorder); } + + @Override + @ForceInline + Halffloat64Shuffle shuffleFromArray(int[] indexes, int i) { return new Halffloat64Shuffle(indexes, i); } + + @Override + @ForceInline + Halffloat64Shuffle shuffleFromOp(IntUnaryOperator fn) { return new Halffloat64Shuffle(fn); } + + // Make a vector of the same species but the given elements: + @ForceInline + final @Override + Halffloat64Vector vectorFactory(short[] vec) { + return new Halffloat64Vector(vec); + } + + @ForceInline + final @Override + Byte64Vector asByteVectorRaw() { + return (Byte64Vector) super.asByteVectorRawTemplate(); // specialize + } + + @ForceInline + final @Override + AbstractVector<?> asVectorRaw(LaneType laneType) { + return super.asVectorRawTemplate(laneType); // specialize + } + + // Unary operator + + @ForceInline + final @Override + Halffloat64Vector uOp(FUnOp f) { + return (Halffloat64Vector) super.uOpTemplate(f); // specialize + } + + @ForceInline + final @Override + Halffloat64Vector uOp(VectorMask<Halffloat> m, FUnOp f) { + return (Halffloat64Vector) + super.uOpTemplate((Halffloat64Mask)m, f); // specialize + } + + // Binary operator + + @ForceInline + final @Override + Halffloat64Vector bOp(Vector<Halffloat> v, FBinOp f) { + return (Halffloat64Vector) super.bOpTemplate((Halffloat64Vector)v, f); // specialize + } + + @ForceInline + final @Override + Halffloat64Vector bOp(Vector<Halffloat> v, + VectorMask<Halffloat> m, FBinOp f) { + return (Halffloat64Vector) + super.bOpTemplate((Halffloat64Vector)v, (Halffloat64Mask)m, + f); // specialize + } + + // Ternary operator + + @ForceInline + final @Override + Halffloat64Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, FTriOp f) { + return (Halffloat64Vector) + super.tOpTemplate((Halffloat64Vector)v1, (Halffloat64Vector)v2, + f); // specialize + } + + @ForceInline + final @Override + Halffloat64Vector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, + VectorMask<Halffloat> m, FTriOp f) { + return (Halffloat64Vector) + super.tOpTemplate((Halffloat64Vector)v1, (Halffloat64Vector)v2, + (Halffloat64Mask)m, f); // specialize + } + + @ForceInline + final @Override + short rOp(short v, VectorMask<Halffloat> m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize + } + + @Override + @ForceInline + public final <F> + Vector<F> convertShape(VectorOperators.Conversion<Halffloat,F> conv, + VectorSpecies<F> rsp, int part) { + return super.convertShapeTemplate(conv, rsp, part); // specialize + } + + @Override + @ForceInline + public final <F> + Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) { + return super.reinterpretShapeTemplate(toSpecies, part); // specialize + } + + // Specialized algebraic operations: + + // The following definition forces a specialized version of this + // crucial method into the v-table of this class. A call to add() + // will inline to a call to lanewise(ADD,), at which point the JIT + // intrinsic will have the opcode of ADD, plus all the metadata + // for this particular class, enabling it to generate precise + // code. + // + // There is probably no benefit to the JIT to specialize the + // masked or broadcast versions of the lanewise method. + + @Override + @ForceInline + public Halffloat64Vector lanewise(Unary op) { + return (Halffloat64Vector) super.lanewiseTemplate(op); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector lanewise(Unary op, VectorMask<Halffloat> m) { + return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector lanewise(Binary op, Vector<Halffloat> v) { + return (Halffloat64Vector) super.lanewiseTemplate(op, v); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector lanewise(Binary op, Vector<Halffloat> v, VectorMask<Halffloat> m) { + return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, v, (Halffloat64Mask) m); // specialize + } + + + /*package-private*/ + @Override + @ForceInline + public final + Halffloat64Vector + lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2) { + return (Halffloat64Vector) super.lanewiseTemplate(op, v1, v2); // specialize + } + + @Override + @ForceInline + public final + Halffloat64Vector + lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2, VectorMask<Halffloat> m) { + return (Halffloat64Vector) super.lanewiseTemplate(op, Halffloat64Mask.class, v1, v2, (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public final + Halffloat64Vector addIndex(int scale) { + return (Halffloat64Vector) super.addIndexTemplate(scale); // specialize + } + + // Type specific horizontal reductions + + @Override + @ForceInline + public final short reduceLanes(VectorOperators.Associative op) { + return super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final short reduceLanes(VectorOperators.Associative op, + VectorMask<Halffloat> m) { + return super.reduceLanesTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op) { + return (long) super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op, + VectorMask<Halffloat> m) { + return (long) super.reduceLanesTemplate(op, Halffloat64Mask.class, (Halffloat64Mask) m); // specialized + } + + @ForceInline + public VectorShuffle<Halffloat> toShuffle() { + return super.toShuffleTemplate(Halffloat64Shuffle.class); // specialize + } + + // Specialized unary testing + + @Override + @ForceInline + public final Halffloat64Mask test(Test op) { + return super.testTemplate(Halffloat64Mask.class, op); // specialize + } + + // Specialized comparisons + + @Override + @ForceInline + public final Halffloat64Mask compare(Comparison op, Vector<Halffloat> v) { + return super.compareTemplate(Halffloat64Mask.class, op, v); // specialize + } + + @Override + @ForceInline + public final Halffloat64Mask compare(Comparison op, short s) { + return super.compareTemplate(Halffloat64Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat64Mask compare(Comparison op, long s) { + return super.compareTemplate(Halffloat64Mask.class, op, s); // specialize + } + + @Override + @ForceInline + public final Halffloat64Mask compare(Comparison op, Vector<Halffloat> v, VectorMask<Halffloat> m) { + return super.compareTemplate(Halffloat64Mask.class, op, v, (Halffloat64Mask) m); + } + + + @Override + @ForceInline + public Halffloat64Vector blend(Vector<Halffloat> v, VectorMask<Halffloat> m) { + return (Halffloat64Vector) + super.blendTemplate(Halffloat64Mask.class, + (Halffloat64Vector) v, + (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector slice(int origin, Vector<Halffloat> v) { + return (Halffloat64Vector) super.sliceTemplate(origin, v); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector slice(int origin) { + return (Halffloat64Vector) super.sliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector unslice(int origin, Vector<Halffloat> w, int part) { + return (Halffloat64Vector) super.unsliceTemplate(origin, w, part); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m) { + return (Halffloat64Vector) + super.unsliceTemplate(Halffloat64Mask.class, + origin, w, part, + (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector unslice(int origin) { + return (Halffloat64Vector) super.unsliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector rearrange(VectorShuffle<Halffloat> s) { + return (Halffloat64Vector) + super.rearrangeTemplate(Halffloat64Shuffle.class, + (Halffloat64Shuffle) s); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector rearrange(VectorShuffle<Halffloat> shuffle, + VectorMask<Halffloat> m) { + return (Halffloat64Vector) + super.rearrangeTemplate(Halffloat64Shuffle.class, + Halffloat64Mask.class, + (Halffloat64Shuffle) shuffle, + (Halffloat64Mask) m); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector rearrange(VectorShuffle<Halffloat> s, + Vector<Halffloat> v) { + return (Halffloat64Vector) + super.rearrangeTemplate(Halffloat64Shuffle.class, + (Halffloat64Shuffle) s, + (Halffloat64Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector selectFrom(Vector<Halffloat> v) { + return (Halffloat64Vector) + super.selectFromTemplate((Halffloat64Vector) v); // specialize + } + + @Override + @ForceInline + public Halffloat64Vector selectFrom(Vector<Halffloat> v, + VectorMask<Halffloat> m) { + return (Halffloat64Vector) + super.selectFromTemplate((Halffloat64Vector) v, + (Halffloat64Mask) m); // specialize + } + + + @ForceInline + @Override + public short lane(int i) { + short bits; + switch(i) { + case 0: bits = laneHelper(0); break; + case 1: bits = laneHelper(1); break; + case 2: bits = laneHelper(2); break; + case 3: bits = laneHelper(3); break; + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + return Halffloat.shortBitsToHalffloat(bits); + } + + public short laneHelper(int i) { + return (short) VectorSupport.extract( + VCLASS, ETYPE, VLENGTH, + this, i, + (vec, ix) -> { + short[] vecarr = vec.vec(); + return (long)Halffloat.shortToShortBits(vecarr[ix]); + }); + } + + @ForceInline + @Override + public Halffloat64Vector withLane(int i, short e) { + switch(i) { + case 0: return withLaneHelper(0, e); + case 1: return withLaneHelper(1, e); + case 2: return withLaneHelper(2, e); + case 3: return withLaneHelper(3, e); + default: throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + } + + public Halffloat64Vector withLaneHelper(int i, short e) { + return VectorSupport.insert( + VCLASS, ETYPE, VLENGTH, + this, i, (long)Halffloat.shortToShortBits(e), + (v, ix, bits) -> { + short[] res = v.vec().clone(); + res[ix] = Halffloat.shortBitsToHalffloat((short)bits); + return v.vectorFactory(res); + }); + } + + // Mask + + static final class Halffloat64Mask extends AbstractMask<Halffloat> { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat64Mask(boolean[] bits) { + this(bits, 0); + } + + Halffloat64Mask(boolean[] bits, int offset) { + super(prepare(bits, offset)); + } + + Halffloat64Mask(boolean val) { + super(prepare(val)); + } + + private static boolean[] prepare(boolean[] bits, int offset) { + boolean[] newBits = new boolean[VSPECIES.laneCount()]; + for (int i = 0; i < newBits.length; i++) { + newBits[i] = bits[offset + i]; + } + return newBits; + } + + private static boolean[] prepare(boolean val) { + boolean[] bits = new boolean[VSPECIES.laneCount()]; + Arrays.fill(bits, val); + return bits; + } + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractMask, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + boolean[] getBits() { + return (boolean[])getPayload(); + } + + @Override + Halffloat64Mask uOp(MUnOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i]); + } + return new Halffloat64Mask(res); + } + + @Override + Halffloat64Mask bOp(VectorMask<Halffloat> m, MBinOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + boolean[] mbits = ((Halffloat64Mask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i], mbits[i]); + } + return new Halffloat64Mask(res); + } + + @ForceInline + @Override + public final + Halffloat64Vector toVector() { + return (Halffloat64Vector) super.toVectorTemplate(); // specialize + } + + /** + * Helper function for lane-wise mask conversions. + * This function kicks in after intrinsic failure. + */ + @ForceInline + private final <E> + VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) { + if (length() != dsp.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + boolean[] maskArray = toArray(); + return dsp.maskFactory(maskArray).check(dsp); + } + + @Override + @ForceInline + public <E> VectorMask<E> cast(VectorSpecies<E> dsp) { + AbstractSpecies<E> species = (AbstractSpecies<E>) dsp; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); + } + + @Override + @ForceInline + public Halffloat64Mask eq(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat64Mask m = (Halffloat64Mask)mask; + return xor(m.not()); + } + + // Unary operations + + @Override + @ForceInline + public Halffloat64Mask not() { + return xor(maskAll(true)); + } + + // Binary operations + + @Override + @ForceInline + public Halffloat64Mask and(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat64Mask m = (Halffloat64Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_AND, Halffloat64Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); + } + + @Override + @ForceInline + public Halffloat64Mask or(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat64Mask m = (Halffloat64Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_OR, Halffloat64Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); + } + + @ForceInline + /* package-private */ + Halffloat64Mask xor(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + Halffloat64Mask m = (Halffloat64Mask)mask; + return VectorSupport.binaryOp(VECTOR_OP_XOR, Halffloat64Mask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + } + + // Mask Query operations + + @Override + @ForceInline + public int trueCount() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, Halffloat64Mask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); + } + + @Override + @ForceInline + public int firstTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, Halffloat64Mask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public int lastTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, Halffloat64Mask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, Halffloat64Mask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); + } + + // Reductions + + @Override + @ForceInline + public boolean anyTrue() { + return VectorSupport.test(BT_ne, Halffloat64Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> anyTrueHelper(((Halffloat64Mask)m).getBits())); + } + + @Override + @ForceInline + public boolean allTrue() { + return VectorSupport.test(BT_overflow, Halffloat64Mask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> allTrueHelper(((Halffloat64Mask)m).getBits())); + } + + @ForceInline + /*package-private*/ + static Halffloat64Mask maskAll(boolean bit) { + return VectorSupport.broadcastCoerced(Halffloat64Mask.class, short.class, VLENGTH, + (bit ? -1 : 0), null, + (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); + } + private static final Halffloat64Mask TRUE_MASK = new Halffloat64Mask(true); + private static final Halffloat64Mask FALSE_MASK = new Halffloat64Mask(false); + + } + + // Shuffle + + static final class Halffloat64Shuffle extends AbstractShuffle<Halffloat> { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + Halffloat64Shuffle(byte[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat64Shuffle(int[] reorder) { + super(VLENGTH, reorder); + } + + public Halffloat64Shuffle(int[] reorder, int i) { + super(VLENGTH, reorder, i); + } + + public Halffloat64Shuffle(IntUnaryOperator fn) { + super(VLENGTH, fn); + } + + @Override + public HalffloatSpecies vspecies() { + return VSPECIES; + } + + static { + // There must be enough bits in the shuffle lanes to encode + // VLENGTH valid indexes and VLENGTH exceptional ones. + assert(VLENGTH < Byte.MAX_VALUE); + assert(Byte.MIN_VALUE <= -VLENGTH); + } + static final Halffloat64Shuffle IOTA = new Halffloat64Shuffle(IDENTITY); + + @Override + @ForceInline + public Halffloat64Vector toVector() { + return VectorSupport.shuffleToVector(VCLASS, ETYPE, Halffloat64Shuffle.class, this, VLENGTH, + (s) -> ((Halffloat64Vector)(((AbstractShuffle<Halffloat>)(s)).toVectorTemplate()))); + } + + @Override + @ForceInline + public <F> VectorShuffle<F> cast(VectorSpecies<F> s) { + AbstractSpecies<F> species = (AbstractSpecies<F>) s; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + int[] shuffleArray = toArray(); + return s.shuffleFromArray(shuffleArray, 0).check(s); + } + + @ForceInline + @Override + public Halffloat64Shuffle rearrange(VectorShuffle<Halffloat> shuffle) { + Halffloat64Shuffle s = (Halffloat64Shuffle) shuffle; + byte[] reorder1 = reorder(); + byte[] reorder2 = s.reorder(); + byte[] r = new byte[reorder1.length]; + for (int i = 0; i < reorder1.length; i++) { + int ssi = reorder2[i]; + r[i] = reorder1[ssi]; // throws on exceptional index + } + return new Halffloat64Shuffle(r); + } + } + + // ================================================ + + // Specialized low-level memory operations. + + @ForceInline + @Override + final + HalffloatVector fromArray0(short[] a, int offset) { + return super.fromArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m) { + return super.fromArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m); // specialize + } + + + @ForceInline + @Override + final + HalffloatVector fromCharArray0(char[] a, int offset) { + return super.fromCharArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m) { + return super.fromCharArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m); // specialize + } + + + @ForceInline + @Override + final + HalffloatVector fromByteArray0(byte[] a, int offset) { + return super.fromByteArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) { + return super.fromByteArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset) { + return super.fromByteBuffer0Template(bb, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) { + return super.fromByteBuffer0Template(Halffloat64Mask.class, bb, offset, (Halffloat64Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoArray0(short[] a, int offset) { + super.intoArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoArray0(short[] a, int offset, VectorMask<Halffloat> m) { + super.intoArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m); + } + + + + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset) { + super.intoByteArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) { + super.intoByteArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) { + super.intoByteBuffer0Template(Halffloat64Mask.class, bb, offset, (Halffloat64Mask) m); + } + + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m) { + super.intoCharArray0Template(Halffloat64Mask.class, a, offset, (Halffloat64Mask) m); + } + + // End of specialized low-level memory operations. + + // ================================================ + +} diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java new file mode 100644 index 00000000000..7dbbb93de0a --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatMaxVector.java @@ -0,0 +1,904 @@ +/* + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.IntUnaryOperator; + +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +@SuppressWarnings("cast") // warning: redundant cast +final class HalffloatMaxVector extends HalffloatVector { + static final HalffloatSpecies VSPECIES = + (HalffloatSpecies) HalffloatVector.SPECIES_MAX; + + static final VectorShape VSHAPE = + VSPECIES.vectorShape(); + + static final Class<HalffloatMaxVector> VCLASS = HalffloatMaxVector.class; + + static final int VSIZE = VSPECIES.vectorBitSize(); + + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + HalffloatMaxVector(short[] v) { + super(v); + } + + // For compatibility as HalffloatMaxVector::new, + // stored into species.vectorFactory. + HalffloatMaxVector(Object v) { + this((short[]) v); + } + + static final HalffloatMaxVector ZERO = new HalffloatMaxVector(new short[VLENGTH]); + static final HalffloatMaxVector IOTA = new HalffloatMaxVector(VSPECIES.iotaArray()); + + static { + // Warm up a few species caches. + // If we do this too much we will + // get NPEs from bootstrap circularity. + VSPECIES.dummyVector(); + VSPECIES.withLanes(LaneType.BYTE); + } + + // Specialized extractors + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractVector, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + @Override + public final Class<Halffloat> elementType() { return Halffloat.class; } + + @ForceInline + @Override + public final int elementSize() { return Halffloat.SIZE; } + + @ForceInline + @Override + public final VectorShape shape() { return VSHAPE; } + + @ForceInline + @Override + public final int length() { return VLENGTH; } + + @ForceInline + @Override + public final int bitSize() { return VSIZE; } + + @ForceInline + @Override + public final int byteSize() { return VSIZE / Byte.SIZE; } + + /*package-private*/ + @ForceInline + final @Override + short[] vec() { + return (short[])getPayload(); + } + + // Virtualized constructors + + @Override + @ForceInline + public final HalffloatMaxVector broadcast(short e) { + return (HalffloatMaxVector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + public final HalffloatMaxVector broadcast(long e) { + return (HalffloatMaxVector) super.broadcastTemplate(e); // specialize + } + + @Override + @ForceInline + HalffloatMaxMask maskFromArray(boolean[] bits) { + return new HalffloatMaxMask(bits); + } + + @Override + @ForceInline + HalffloatMaxShuffle iotaShuffle() { return HalffloatMaxShuffle.IOTA; } + + @ForceInline + HalffloatMaxShuffle iotaShuffle(int start, int step, boolean wrap) { + if (wrap) { + return (HalffloatMaxShuffle)VectorSupport.shuffleIota(ETYPE, HalffloatMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); + } else { + return (HalffloatMaxShuffle)VectorSupport.shuffleIota(ETYPE, HalffloatMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0, + (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); + } + } + + @Override + @ForceInline + HalffloatMaxShuffle shuffleFromBytes(byte[] reorder) { return new HalffloatMaxShuffle(reorder); } + + @Override + @ForceInline + HalffloatMaxShuffle shuffleFromArray(int[] indexes, int i) { return new HalffloatMaxShuffle(indexes, i); } + + @Override + @ForceInline + HalffloatMaxShuffle shuffleFromOp(IntUnaryOperator fn) { return new HalffloatMaxShuffle(fn); } + + // Make a vector of the same species but the given elements: + @ForceInline + final @Override + HalffloatMaxVector vectorFactory(short[] vec) { + return new HalffloatMaxVector(vec); + } + + @ForceInline + final @Override + ByteMaxVector asByteVectorRaw() { + return (ByteMaxVector) super.asByteVectorRawTemplate(); // specialize + } + + @ForceInline + final @Override + AbstractVector<?> asVectorRaw(LaneType laneType) { + return super.asVectorRawTemplate(laneType); // specialize + } + + // Unary operator + + @ForceInline + final @Override + HalffloatMaxVector uOp(FUnOp f) { + return (HalffloatMaxVector) super.uOpTemplate(f); // specialize + } + + @ForceInline + final @Override + HalffloatMaxVector uOp(VectorMask<Halffloat> m, FUnOp f) { + return (HalffloatMaxVector) + super.uOpTemplate((HalffloatMaxMask)m, f); // specialize + } + + // Binary operator + + @ForceInline + final @Override + HalffloatMaxVector bOp(Vector<Halffloat> v, FBinOp f) { + return (HalffloatMaxVector) super.bOpTemplate((HalffloatMaxVector)v, f); // specialize + } + + @ForceInline + final @Override + HalffloatMaxVector bOp(Vector<Halffloat> v, + VectorMask<Halffloat> m, FBinOp f) { + return (HalffloatMaxVector) + super.bOpTemplate((HalffloatMaxVector)v, (HalffloatMaxMask)m, + f); // specialize + } + + // Ternary operator + + @ForceInline + final @Override + HalffloatMaxVector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, FTriOp f) { + return (HalffloatMaxVector) + super.tOpTemplate((HalffloatMaxVector)v1, (HalffloatMaxVector)v2, + f); // specialize + } + + @ForceInline + final @Override + HalffloatMaxVector tOp(Vector<Halffloat> v1, Vector<Halffloat> v2, + VectorMask<Halffloat> m, FTriOp f) { + return (HalffloatMaxVector) + super.tOpTemplate((HalffloatMaxVector)v1, (HalffloatMaxVector)v2, + (HalffloatMaxMask)m, f); // specialize + } + + @ForceInline + final @Override + short rOp(short v, VectorMask<Halffloat> m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize + } + + @Override + @ForceInline + public final <F> + Vector<F> convertShape(VectorOperators.Conversion<Halffloat,F> conv, + VectorSpecies<F> rsp, int part) { + return super.convertShapeTemplate(conv, rsp, part); // specialize + } + + @Override + @ForceInline + public final <F> + Vector<F> reinterpretShape(VectorSpecies<F> toSpecies, int part) { + return super.reinterpretShapeTemplate(toSpecies, part); // specialize + } + + // Specialized algebraic operations: + + // The following definition forces a specialized version of this + // crucial method into the v-table of this class. A call to add() + // will inline to a call to lanewise(ADD,), at which point the JIT + // intrinsic will have the opcode of ADD, plus all the metadata + // for this particular class, enabling it to generate precise + // code. + // + // There is probably no benefit to the JIT to specialize the + // masked or broadcast versions of the lanewise method. + + @Override + @ForceInline + public HalffloatMaxVector lanewise(Unary op) { + return (HalffloatMaxVector) super.lanewiseTemplate(op); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector lanewise(Unary op, VectorMask<Halffloat> m) { + return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector lanewise(Binary op, Vector<Halffloat> v) { + return (HalffloatMaxVector) super.lanewiseTemplate(op, v); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector lanewise(Binary op, Vector<Halffloat> v, VectorMask<Halffloat> m) { + return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, v, (HalffloatMaxMask) m); // specialize + } + + + /*package-private*/ + @Override + @ForceInline + public final + HalffloatMaxVector + lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2) { + return (HalffloatMaxVector) super.lanewiseTemplate(op, v1, v2); // specialize + } + + @Override + @ForceInline + public final + HalffloatMaxVector + lanewise(Ternary op, Vector<Halffloat> v1, Vector<Halffloat> v2, VectorMask<Halffloat> m) { + return (HalffloatMaxVector) super.lanewiseTemplate(op, HalffloatMaxMask.class, v1, v2, (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public final + HalffloatMaxVector addIndex(int scale) { + return (HalffloatMaxVector) super.addIndexTemplate(scale); // specialize + } + + // Type specific horizontal reductions + + @Override + @ForceInline + public final short reduceLanes(VectorOperators.Associative op) { + return super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final short reduceLanes(VectorOperators.Associative op, + VectorMask<Halffloat> m) { + return super.reduceLanesTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op) { + return (long) super.reduceLanesTemplate(op); // specialized + } + + @Override + @ForceInline + public final long reduceLanesToLong(VectorOperators.Associative op, + VectorMask<Halffloat> m) { + return (long) super.reduceLanesTemplate(op, HalffloatMaxMask.class, (HalffloatMaxMask) m); // specialized + } + + @ForceInline + public VectorShuffle<Halffloat> toShuffle() { + return super.toShuffleTemplate(HalffloatMaxShuffle.class); // specialize + } + + // Specialized unary testing + + @Override + @ForceInline + public final HalffloatMaxMask test(Test op) { + return super.testTemplate(HalffloatMaxMask.class, op); // specialize + } + + // Specialized comparisons + + @Override + @ForceInline + public final HalffloatMaxMask compare(Comparison op, Vector<Halffloat> v) { + return super.compareTemplate(HalffloatMaxMask.class, op, v); // specialize + } + + @Override + @ForceInline + public final HalffloatMaxMask compare(Comparison op, short s) { + return super.compareTemplate(HalffloatMaxMask.class, op, s); // specialize + } + + @Override + @ForceInline + public final HalffloatMaxMask compare(Comparison op, long s) { + return super.compareTemplate(HalffloatMaxMask.class, op, s); // specialize + } + + @Override + @ForceInline + public final HalffloatMaxMask compare(Comparison op, Vector<Halffloat> v, VectorMask<Halffloat> m) { + return super.compareTemplate(HalffloatMaxMask.class, op, v, (HalffloatMaxMask) m); + } + + + @Override + @ForceInline + public HalffloatMaxVector blend(Vector<Halffloat> v, VectorMask<Halffloat> m) { + return (HalffloatMaxVector) + super.blendTemplate(HalffloatMaxMask.class, + (HalffloatMaxVector) v, + (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector slice(int origin, Vector<Halffloat> v) { + return (HalffloatMaxVector) super.sliceTemplate(origin, v); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector slice(int origin) { + return (HalffloatMaxVector) super.sliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector unslice(int origin, Vector<Halffloat> w, int part) { + return (HalffloatMaxVector) super.unsliceTemplate(origin, w, part); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m) { + return (HalffloatMaxVector) + super.unsliceTemplate(HalffloatMaxMask.class, + origin, w, part, + (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector unslice(int origin) { + return (HalffloatMaxVector) super.unsliceTemplate(origin); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector rearrange(VectorShuffle<Halffloat> s) { + return (HalffloatMaxVector) + super.rearrangeTemplate(HalffloatMaxShuffle.class, + (HalffloatMaxShuffle) s); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector rearrange(VectorShuffle<Halffloat> shuffle, + VectorMask<Halffloat> m) { + return (HalffloatMaxVector) + super.rearrangeTemplate(HalffloatMaxShuffle.class, + HalffloatMaxMask.class, + (HalffloatMaxShuffle) shuffle, + (HalffloatMaxMask) m); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector rearrange(VectorShuffle<Halffloat> s, + Vector<Halffloat> v) { + return (HalffloatMaxVector) + super.rearrangeTemplate(HalffloatMaxShuffle.class, + (HalffloatMaxShuffle) s, + (HalffloatMaxVector) v); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector selectFrom(Vector<Halffloat> v) { + return (HalffloatMaxVector) + super.selectFromTemplate((HalffloatMaxVector) v); // specialize + } + + @Override + @ForceInline + public HalffloatMaxVector selectFrom(Vector<Halffloat> v, + VectorMask<Halffloat> m) { + return (HalffloatMaxVector) + super.selectFromTemplate((HalffloatMaxVector) v, + (HalffloatMaxMask) m); // specialize + } + + + @ForceInline + @Override + public short lane(int i) { + if (i < 0 || i >= VLENGTH) { + throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + short bits = laneHelper(i); + return Halffloat.shortBitsToHalffloat(bits); + } + + public short laneHelper(int i) { + return (short) VectorSupport.extract( + VCLASS, ETYPE, VLENGTH, + this, i, + (vec, ix) -> { + short[] vecarr = vec.vec(); + return (long)Halffloat.shortToShortBits(vecarr[ix]); + }); + } + + @ForceInline + @Override + public HalffloatMaxVector withLane(int i, short e) { + if (i < 0 || i >= VLENGTH) { + throw new IllegalArgumentException("Index " + i + " must be zero or positive, and less than " + VLENGTH); + } + return withLaneHelper(i, e); + } + + public HalffloatMaxVector withLaneHelper(int i, short e) { + return VectorSupport.insert( + VCLASS, ETYPE, VLENGTH, + this, i, (long)Halffloat.shortToShortBits(e), + (v, ix, bits) -> { + short[] res = v.vec().clone(); + res[ix] = Halffloat.shortBitsToHalffloat((short)bits); + return v.vectorFactory(res); + }); + } + + // Mask + + static final class HalffloatMaxMask extends AbstractMask<Halffloat> { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + HalffloatMaxMask(boolean[] bits) { + this(bits, 0); + } + + HalffloatMaxMask(boolean[] bits, int offset) { + super(prepare(bits, offset)); + } + + HalffloatMaxMask(boolean val) { + super(prepare(val)); + } + + private static boolean[] prepare(boolean[] bits, int offset) { + boolean[] newBits = new boolean[VSPECIES.laneCount()]; + for (int i = 0; i < newBits.length; i++) { + newBits[i] = bits[offset + i]; + } + return newBits; + } + + private static boolean[] prepare(boolean val) { + boolean[] bits = new boolean[VSPECIES.laneCount()]; + Arrays.fill(bits, val); + return bits; + } + + @ForceInline + final @Override + public HalffloatSpecies vspecies() { + // ISSUE: This should probably be a @Stable + // field inside AbstractMask, rather than + // a megamorphic method. + return VSPECIES; + } + + @ForceInline + boolean[] getBits() { + return (boolean[])getPayload(); + } + + @Override + HalffloatMaxMask uOp(MUnOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i]); + } + return new HalffloatMaxMask(res); + } + + @Override + HalffloatMaxMask bOp(VectorMask<Halffloat> m, MBinOp f) { + boolean[] res = new boolean[vspecies().laneCount()]; + boolean[] bits = getBits(); + boolean[] mbits = ((HalffloatMaxMask)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, bits[i], mbits[i]); + } + return new HalffloatMaxMask(res); + } + + @ForceInline + @Override + public final + HalffloatMaxVector toVector() { + return (HalffloatMaxVector) super.toVectorTemplate(); // specialize + } + + /** + * Helper function for lane-wise mask conversions. + * This function kicks in after intrinsic failure. + */ + @ForceInline + private final <E> + VectorMask<E> defaultMaskCast(AbstractSpecies<E> dsp) { + if (length() != dsp.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + boolean[] maskArray = toArray(); + return dsp.maskFactory(maskArray).check(dsp); + } + + @Override + @ForceInline + public <E> VectorMask<E> cast(VectorSpecies<E> dsp) { + AbstractSpecies<E> species = (AbstractSpecies<E>) dsp; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorMask length and species length differ"); + + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + this.getClass(), ETYPE, VLENGTH, + species.maskType(), species.elementType(), VLENGTH, + this, species, + (m, s) -> s.maskFactory(m.toArray()).check(s)); + } + + @Override + @ForceInline + public HalffloatMaxMask eq(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + HalffloatMaxMask m = (HalffloatMaxMask)mask; + return xor(m.not()); + } + + // Unary operations + + @Override + @ForceInline + public HalffloatMaxMask not() { + return xor(maskAll(true)); + } + + // Binary operations + + @Override + @ForceInline + public HalffloatMaxMask and(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + HalffloatMaxMask m = (HalffloatMaxMask)mask; + return VectorSupport.binaryOp(VECTOR_OP_AND, HalffloatMaxMask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a & b)); + } + + @Override + @ForceInline + public HalffloatMaxMask or(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + HalffloatMaxMask m = (HalffloatMaxMask)mask; + return VectorSupport.binaryOp(VECTOR_OP_OR, HalffloatMaxMask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a | b)); + } + + @ForceInline + /* package-private */ + HalffloatMaxMask xor(VectorMask<Halffloat> mask) { + Objects.requireNonNull(mask); + HalffloatMaxMask m = (HalffloatMaxMask)mask; + return VectorSupport.binaryOp(VECTOR_OP_XOR, HalffloatMaxMask.class, null, short.class, VLENGTH, + this, m, null, + (m1, m2, vm) -> m1.bOp(m2, (i, a, b) -> a ^ b)); + } + + // Mask Query operations + + @Override + @ForceInline + public int trueCount() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TRUECOUNT, HalffloatMaxMask.class, short.class, VLENGTH, this, + (m) -> trueCountHelper(m.getBits())); + } + + @Override + @ForceInline + public int firstTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_FIRSTTRUE, HalffloatMaxMask.class, short.class, VLENGTH, this, + (m) -> firstTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public int lastTrue() { + return (int) VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_LASTTRUE, HalffloatMaxMask.class, short.class, VLENGTH, this, + (m) -> lastTrueHelper(m.getBits())); + } + + @Override + @ForceInline + public long toLong() { + if (length() > Long.SIZE) { + throw new UnsupportedOperationException("too many lanes for one long"); + } + return VectorSupport.maskReductionCoerced(VECTOR_OP_MASK_TOLONG, HalffloatMaxMask.class, short.class, VLENGTH, this, + (m) -> toLongHelper(m.getBits())); + } + + // Reductions + + @Override + @ForceInline + public boolean anyTrue() { + return VectorSupport.test(BT_ne, HalffloatMaxMask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> anyTrueHelper(((HalffloatMaxMask)m).getBits())); + } + + @Override + @ForceInline + public boolean allTrue() { + return VectorSupport.test(BT_overflow, HalffloatMaxMask.class, short.class, VLENGTH, + this, vspecies().maskAll(true), + (m, __) -> allTrueHelper(((HalffloatMaxMask)m).getBits())); + } + + @ForceInline + /*package-private*/ + static HalffloatMaxMask maskAll(boolean bit) { + return VectorSupport.broadcastCoerced(HalffloatMaxMask.class, short.class, VLENGTH, + (bit ? -1 : 0), null, + (v, __) -> (v != 0 ? TRUE_MASK : FALSE_MASK)); + } + private static final HalffloatMaxMask TRUE_MASK = new HalffloatMaxMask(true); + private static final HalffloatMaxMask FALSE_MASK = new HalffloatMaxMask(false); + + } + + // Shuffle + + static final class HalffloatMaxShuffle extends AbstractShuffle<Halffloat> { + static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM + static final Class<Halffloat> ETYPE = Halffloat.class; // used by the JVM + + HalffloatMaxShuffle(byte[] reorder) { + super(VLENGTH, reorder); + } + + public HalffloatMaxShuffle(int[] reorder) { + super(VLENGTH, reorder); + } + + public HalffloatMaxShuffle(int[] reorder, int i) { + super(VLENGTH, reorder, i); + } + + public HalffloatMaxShuffle(IntUnaryOperator fn) { + super(VLENGTH, fn); + } + + @Override + public HalffloatSpecies vspecies() { + return VSPECIES; + } + + static { + // There must be enough bits in the shuffle lanes to encode + // VLENGTH valid indexes and VLENGTH exceptional ones. + assert(VLENGTH < Byte.MAX_VALUE); + assert(Byte.MIN_VALUE <= -VLENGTH); + } + static final HalffloatMaxShuffle IOTA = new HalffloatMaxShuffle(IDENTITY); + + @Override + @ForceInline + public HalffloatMaxVector toVector() { + return VectorSupport.shuffleToVector(VCLASS, ETYPE, HalffloatMaxShuffle.class, this, VLENGTH, + (s) -> ((HalffloatMaxVector)(((AbstractShuffle<Halffloat>)(s)).toVectorTemplate()))); + } + + @Override + @ForceInline + public <F> VectorShuffle<F> cast(VectorSpecies<F> s) { + AbstractSpecies<F> species = (AbstractSpecies<F>) s; + if (length() != species.laneCount()) + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + int[] shuffleArray = toArray(); + return s.shuffleFromArray(shuffleArray, 0).check(s); + } + + @ForceInline + @Override + public HalffloatMaxShuffle rearrange(VectorShuffle<Halffloat> shuffle) { + HalffloatMaxShuffle s = (HalffloatMaxShuffle) shuffle; + byte[] reorder1 = reorder(); + byte[] reorder2 = s.reorder(); + byte[] r = new byte[reorder1.length]; + for (int i = 0; i < reorder1.length; i++) { + int ssi = reorder2[i]; + r[i] = reorder1[ssi]; // throws on exceptional index + } + return new HalffloatMaxShuffle(r); + } + } + + // ================================================ + + // Specialized low-level memory operations. + + @ForceInline + @Override + final + HalffloatVector fromArray0(short[] a, int offset) { + return super.fromArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m) { + return super.fromArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m); // specialize + } + + + @ForceInline + @Override + final + HalffloatVector fromCharArray0(char[] a, int offset) { + return super.fromCharArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m) { + return super.fromCharArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m); // specialize + } + + + @ForceInline + @Override + final + HalffloatVector fromByteArray0(byte[] a, int offset) { + return super.fromByteArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) { + return super.fromByteArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset) { + return super.fromByteBuffer0Template(bb, offset); // specialize + } + + @ForceInline + @Override + final + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) { + return super.fromByteBuffer0Template(HalffloatMaxMask.class, bb, offset, (HalffloatMaxMask) m); // specialize + } + + @ForceInline + @Override + final + void intoArray0(short[] a, int offset) { + super.intoArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoArray0(short[] a, int offset, VectorMask<Halffloat> m) { + super.intoArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m); + } + + + + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset) { + super.intoByteArray0Template(a, offset); // specialize + } + + @ForceInline + @Override + final + void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m) { + super.intoByteArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m); // specialize + } + + @ForceInline + @Override + final + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m) { + super.intoByteBuffer0Template(HalffloatMaxMask.class, bb, offset, (HalffloatMaxMask) m); + } + + @ForceInline + @Override + final + void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m) { + super.intoCharArray0Template(HalffloatMaxMask.class, a, offset, (HalffloatMaxMask) m); + } + + // End of specialized low-level memory operations. + + // ================================================ + +} diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java new file mode 100644 index 00000000000..7a748f72294 --- /dev/null +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/HalffloatVector.java @@ -0,0 +1,4151 @@ +/* + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package jdk.incubator.vector; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.ReadOnlyBufferException; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.Function; +import java.util.function.UnaryOperator; + +import jdk.internal.misc.ScopedMemoryAccess; +import jdk.internal.misc.Unsafe; +import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; + +import static jdk.internal.vm.vector.VectorSupport.*; +import static jdk.incubator.vector.VectorIntrinsics.*; + +import static jdk.incubator.vector.VectorOperators.*; + +// -- This file was mechanically generated: Do not edit! -- // + +/** + * A specialized {@link Vector} representing an ordered immutable sequence of + * {@code short} values. + */ +@SuppressWarnings("cast") // warning: redundant cast +public abstract class HalffloatVector extends AbstractVector<Halffloat> { + + HalffloatVector(short[] vec) { + super(vec); + } + + static final int FORBID_OPCODE_KIND = VO_NOFP; + + @ForceInline + static int opCode(Operator op) { + return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND); + } + @ForceInline + static int opCode(Operator op, int requireKind) { + requireKind |= VO_OPCODE_VALID; + return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND); + } + @ForceInline + static boolean opKind(Operator op, int bit) { + return VectorOperators.opKind(op, bit); + } + + // Virtualized factories and operators, + // coded with portable definitions. + // These are all @ForceInline in case + // they need to be used performantly. + // The various shape-specific subclasses + // also specialize them by wrapping + // them in a call like this: + // return (Byte128Vector) + // super.bOp((Byte128Vector) o); + // The purpose of that is to forcibly inline + // the generic definition from this file + // into a sharply type- and size-specific + // wrapper in the subclass file, so that + // the JIT can specialize the code. + // The code is only inlined and expanded + // if it gets hot. Think of it as a cheap + // and lazy version of C++ templates. + + // Virtualized getter + + /*package-private*/ + abstract short[] vec(); + + // Virtualized constructors + + /** + * Build a vector directly using my own constructor. + * It is an error if the array is aliased elsewhere. + */ + /*package-private*/ + abstract HalffloatVector vectorFactory(short[] vec); + + /** + * Build a mask directly using my species. + * It is an error if the array is aliased elsewhere. + */ + /*package-private*/ + @ForceInline + final + AbstractMask<Halffloat> maskFactory(boolean[] bits) { + return vspecies().maskFactory(bits); + } + + // Constant loader (takes dummy as vector arg) + interface FVOp { + short apply(int i); + } + + /*package-private*/ + @ForceInline + final + HalffloatVector vOp(FVOp f) { + short[] res = new short[length()]; + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i); + } + return vectorFactory(res); + } + + @ForceInline + final + HalffloatVector vOp(VectorMask<Halffloat> m, FVOp f) { + short[] res = new short[length()]; + boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits(); + for (int i = 0; i < res.length; i++) { + if (mbits[i]) { + res[i] = f.apply(i); + } + } + return vectorFactory(res); + } + + // Unary operator + + /*package-private*/ + interface FUnOp { + short apply(int i, short a); + } + + /*package-private*/ + abstract + HalffloatVector uOp(FUnOp f); + @ForceInline + final + HalffloatVector uOpTemplate(FUnOp f) { + short[] vec = vec(); + short[] res = new short[length()]; + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, vec[i]); + } + return vectorFactory(res); + } + + /*package-private*/ + abstract + HalffloatVector uOp(VectorMask<Halffloat> m, + FUnOp f); + @ForceInline + final + HalffloatVector uOpTemplate(VectorMask<Halffloat> m, + FUnOp f) { + if (m == null) { + return uOpTemplate(f); + } + short[] vec = vec(); + short[] res = new short[length()]; + boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i]; + } + return vectorFactory(res); + } + + // Binary operator + + /*package-private*/ + interface FBinOp { + short apply(int i, short a, short b); + } + + /*package-private*/ + abstract + HalffloatVector bOp(Vector<Halffloat> o, + FBinOp f); + @ForceInline + final + HalffloatVector bOpTemplate(Vector<Halffloat> o, + FBinOp f) { + short[] res = new short[length()]; + short[] vec1 = this.vec(); + short[] vec2 = ((HalffloatVector)o).vec(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, vec1[i], vec2[i]); + } + return vectorFactory(res); + } + + /*package-private*/ + abstract + HalffloatVector bOp(Vector<Halffloat> o, + VectorMask<Halffloat> m, + FBinOp f); + @ForceInline + final + HalffloatVector bOpTemplate(Vector<Halffloat> o, + VectorMask<Halffloat> m, + FBinOp f) { + if (m == null) { + return bOpTemplate(o, f); + } + short[] res = new short[length()]; + short[] vec1 = this.vec(); + short[] vec2 = ((HalffloatVector)o).vec(); + boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i]; + } + return vectorFactory(res); + } + + // Ternary operator + + /*package-private*/ + interface FTriOp { + short apply(int i, short a, short b, short c); + } + + /*package-private*/ + abstract + HalffloatVector tOp(Vector<Halffloat> o1, + Vector<Halffloat> o2, + FTriOp f); + @ForceInline + final + HalffloatVector tOpTemplate(Vector<Halffloat> o1, + Vector<Halffloat> o2, + FTriOp f) { + short[] res = new short[length()]; + short[] vec1 = this.vec(); + short[] vec2 = ((HalffloatVector)o1).vec(); + short[] vec3 = ((HalffloatVector)o2).vec(); + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]); + } + return vectorFactory(res); + } + + /*package-private*/ + abstract + HalffloatVector tOp(Vector<Halffloat> o1, + Vector<Halffloat> o2, + VectorMask<Halffloat> m, + FTriOp f); + @ForceInline + final + HalffloatVector tOpTemplate(Vector<Halffloat> o1, + Vector<Halffloat> o2, + VectorMask<Halffloat> m, + FTriOp f) { + if (m == null) { + return tOpTemplate(o1, o2, f); + } + short[] res = new short[length()]; + short[] vec1 = this.vec(); + short[] vec2 = ((HalffloatVector)o1).vec(); + short[] vec3 = ((HalffloatVector)o2).vec(); + boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits(); + for (int i = 0; i < res.length; i++) { + res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i]; + } + return vectorFactory(res); + } + + // Reduction operator + + /*package-private*/ + abstract + short rOp(short v, VectorMask<Halffloat> m, FBinOp f); + + @ForceInline + final + short rOpTemplate(short v, VectorMask<Halffloat> m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + short[] vec = vec(); + boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + + @ForceInline + final + short rOpTemplate(short v, FBinOp f) { + short[] vec = vec(); + for (int i = 0; i < vec.length; i++) { + v = f.apply(i, v, vec[i]); + } + return v; + } + + // Memory reference + + /*package-private*/ + interface FLdOp<M> { + short apply(M memory, int offset, int i); + } + + /*package-private*/ + @ForceInline + final + <M> HalffloatVector ldOp(M memory, int offset, + FLdOp<M> f) { + //dummy; no vec = vec(); + short[] res = new short[length()]; + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(memory, offset, i); + } + return vectorFactory(res); + } + + /*package-private*/ + @ForceInline + final + <M> HalffloatVector ldOp(M memory, int offset, + VectorMask<Halffloat> m, + FLdOp<M> f) { + //short[] vec = vec(); + short[] res = new short[length()]; + boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits(); + for (int i = 0; i < res.length; i++) { + if (mbits[i]) { + res[i] = f.apply(memory, offset, i); + } + } + return vectorFactory(res); + } + + interface FStOp<M> { + void apply(M memory, int offset, int i, short a); + } + + /*package-private*/ + @ForceInline + final + <M> void stOp(M memory, int offset, + FStOp<M> f) { + short[] vec = vec(); + for (int i = 0; i < vec.length; i++) { + f.apply(memory, offset, i, vec[i]); + } + } + + /*package-private*/ + @ForceInline + final + <M> void stOp(M memory, int offset, + VectorMask<Halffloat> m, + FStOp<M> f) { + short[] vec = vec(); + boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits(); + for (int i = 0; i < vec.length; i++) { + if (mbits[i]) { + f.apply(memory, offset, i, vec[i]); + } + } + } + + // Binary test + + /*package-private*/ + interface FBinTest { + boolean apply(int cond, int i, short a, short b); + } + + /*package-private*/ + @ForceInline + final + AbstractMask<Halffloat> bTest(int cond, + Vector<Halffloat> o, + FBinTest f) { + short[] vec1 = vec(); + short[] vec2 = ((HalffloatVector)o).vec(); + boolean[] bits = new boolean[length()]; + for (int i = 0; i < length(); i++){ + bits[i] = f.apply(cond, i, vec1[i], vec2[i]); + } + return maskFactory(bits); + } + + + /*package-private*/ + @Override + abstract HalffloatSpecies vspecies(); + + /*package-private*/ + @ForceInline + static long toBits(short e) { + return Halffloat.shortToRawShortBits(e); + } + + /*package-private*/ + @ForceInline + static short fromBits(long bits) { + return Halffloat.shortBitsToHalffloat((short)bits); + } + + // Static factories (other than memory operations) + + // Note: A surprising behavior in javadoc + // sometimes makes a lone /** {@inheritDoc} */ + // comment drop the method altogether, + // apparently if the method mentions an + // parameter or return type of Vector<Halffloat> + // instead of Vector<E> as originally specified. + // Adding an empty HTML fragment appears to + // nudge javadoc into providing the desired + // inherited documentation. We use the HTML + // comment <!--workaround--> for this. + + /** + * Returns a vector of the given species + * where all lane elements are set to + * zero, the default primitive value. + * + * @param species species of the desired zero vector + * @return a zero vector + */ + @ForceInline + public static HalffloatVector zero(VectorSpecies<Halffloat> species) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + return VectorSupport.broadcastCoerced(vsp.vectorType(), Halffloat.class, species.length(), + toBits((short)0), vsp, + ((bits_, s_) -> s_.rvOp(i -> bits_))); + } + + /** + * Returns a vector of the same species as this one + * where all lane elements are set to + * the primitive value {@code e}. + * + * The contents of the current vector are discarded; + * only the species is relevant to this operation. + * + * <p> This method returns the value of this expression: + * {@code HalffloatVector.broadcast(this.species(), e)}. + * + * @apiNote + * Unlike the similar method named {@code broadcast()} + * in the supertype {@code Vector}, this method does not + * need to validate its argument, and cannot throw + * {@code IllegalArgumentException}. This method is + * therefore preferable to the supertype method. + * + * @param e the value to broadcast + * @return a vector where all lane elements are set to + * the primitive value {@code e} + * @see #broadcast(VectorSpecies,long) + * @see Vector#broadcast(long) + * @see VectorSpecies#broadcast(long) + */ + public abstract HalffloatVector broadcast(short e); + + /** + * Returns a vector of the given species + * where all lane elements are set to + * the primitive value {@code e}. + * + * @param species species of the desired vector + * @param e the value to broadcast + * @return a vector where all lane elements are set to + * the primitive value {@code e} + * @see #broadcast(long) + * @see Vector#broadcast(long) + * @see VectorSpecies#broadcast(long) + */ + @ForceInline + public static HalffloatVector broadcast(VectorSpecies<Halffloat> species, short e) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.broadcast(e); + } + + /*package-private*/ + @ForceInline + final HalffloatVector broadcastTemplate(short e) { + HalffloatSpecies vsp = vspecies(); + return vsp.broadcast(e); + } + + /** + * {@inheritDoc} <!--workaround--> + * @apiNote + * When working with vector subtypes like {@code HalffloatVector}, + * {@linkplain #broadcast(short) the more strongly typed method} + * is typically selected. It can be explicitly selected + * using a cast: {@code v.broadcast((short)e)}. + * The two expressions will produce numerically identical results. + */ + @Override + public abstract HalffloatVector broadcast(long e); + + /** + * Returns a vector of the given species + * where all lane elements are set to + * the primitive value {@code e}. + * + * The {@code long} value must be accurately representable + * by the {@code ETYPE} of the vector species, so that + * {@code e==(long)(ETYPE)e}. + * + * @param species species of the desired vector + * @param e the value to broadcast + * @return a vector where all lane elements are set to + * the primitive value {@code e} + * @throws IllegalArgumentException + * if the given {@code long} value cannot + * be represented by the vector's {@code ETYPE} + * @see #broadcast(VectorSpecies,short) + * @see VectorSpecies#checkValue(long) + */ + @ForceInline + public static HalffloatVector broadcast(VectorSpecies<Halffloat> species, long e) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.broadcast(e); + } + + /*package-private*/ + @ForceInline + final HalffloatVector broadcastTemplate(long e) { + return vspecies().broadcast(e); + } + + // Unary lanewise support + + /** + * {@inheritDoc} <!--workaround--> + */ + public abstract + HalffloatVector lanewise(VectorOperators.Unary op); + + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Unary op) { + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0)); + } + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), null, Halffloat.class, length(), + this, null, + UN_IMPL.find(op, opc, HalffloatVector::unaryOperations)); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector lanewise(VectorOperators.Unary op, + VectorMask<Halffloat> m); + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Unary op, + Class<? extends VectorMask<Halffloat>> maskClass, + VectorMask<Halffloat> m) { + m.check(maskClass, this); + if (opKind(op, VO_SPECIAL)) { + if (op == ZOMO) { + return blend(broadcast(-1), compare(NE, 0, m)); + } + } + int opc = opCode(op); + return VectorSupport.unaryOp( + opc, getClass(), maskClass, Halffloat.class, length(), + this, m, + UN_IMPL.find(op, opc, HalffloatVector::unaryOperations)); + } + + private static final + ImplCache<Unary, UnaryOperation<HalffloatVector, VectorMask<Halffloat>>> + UN_IMPL = new ImplCache<>(Unary.class, HalffloatVector.class); + + private static UnaryOperation<HalffloatVector, VectorMask<Halffloat>> unaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_NEG: return (v0, m) -> + v0.uOp(m, (i, a) -> (short) -a); + case VECTOR_OP_ABS: return (v0, m) -> + v0.uOp(m, (i, a) -> (short) Math.abs(a)); + default: return null; + } + } + + // Binary lanewise support + + /** + * {@inheritDoc} <!--workaround--> + * @see #lanewise(VectorOperators.Binary,short) + * @see #lanewise(VectorOperators.Binary,short,VectorMask) + */ + @Override + public abstract + HalffloatVector lanewise(VectorOperators.Binary op, + Vector<Halffloat> v); + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Binary op, + Vector<Halffloat> v) { + HalffloatVector that = (HalffloatVector) v; + that.check(this); + + if (opKind(op, VO_SPECIAL )) { + if (op == FIRST_NONZERO) { + // FIXME: Support this in the JIT. + VectorMask<Short> thisNZ + = this.viewAsIntegralLanes().compare(NE, (short) 0); + that = that.blend((short) 0, thisNZ.cast(vspecies())); + op = OR_UNCHECKED; + // FIXME: Support OR_UNCHECKED on float/double also! + return this.viewAsIntegralLanes() + .lanewise(op, that.viewAsIntegralLanes()) + .viewAsFloatingLanes(); + } + } + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), null, Halffloat.class, length(), + this, that, null, + BIN_IMPL.find(op, opc, HalffloatVector::binaryOperations)); + } + + /** + * {@inheritDoc} <!--workaround--> + * @see #lanewise(VectorOperators.Binary,short,VectorMask) + */ + @Override + public abstract + HalffloatVector lanewise(VectorOperators.Binary op, + Vector<Halffloat> v, + VectorMask<Halffloat> m); + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Binary op, + Class<? extends VectorMask<Halffloat>> maskClass, + Vector<Halffloat> v, VectorMask<Halffloat> m) { + HalffloatVector that = (HalffloatVector) v; + that.check(this); + m.check(maskClass, this); + + if (opKind(op, VO_SPECIAL )) { + if (op == FIRST_NONZERO) { + return blend(lanewise(op, v), m); + } + } + + int opc = opCode(op); + return VectorSupport.binaryOp( + opc, getClass(), maskClass, Halffloat.class, length(), + this, that, m, + BIN_IMPL.find(op, opc, HalffloatVector::binaryOperations)); + } + + private static final + ImplCache<Binary, BinaryOperation<HalffloatVector, VectorMask<Halffloat>>> + BIN_IMPL = new ImplCache<>(Binary.class, HalffloatVector.class); + + private static BinaryOperation<HalffloatVector, VectorMask<Halffloat>> binaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() + Halffloat.valueOf(b).floatValue()))); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() - Halffloat.valueOf(b).floatValue()))); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() * Halffloat.valueOf(b).floatValue()))); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf(Math.max(Halffloat.valueOf(a).floatValue(),Halffloat.valueOf(b).floatValue()))); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf(Math.min(Halffloat.valueOf(a).floatValue(),Halffloat.valueOf(b).floatValue()))); + default: return null; + } + } + + // FIXME: Maybe all of the public final methods in this file (the + // simple ones that just call lanewise) should be pushed down to + // the X-VectorBits template. They can't optimize properly at + // this level, and must rely on inlining. Does it work? + // (If it works, of course keep the code here.) + + /** + * Combines the lane values of this vector + * with the value of a broadcast scalar. + * + * This is a lane-wise binary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e))}. + * + * @param op the operation used to process lane values + * @param e the input scalar + * @return the result of applying the operation lane-wise + * to the two input vectors + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,short,VectorMask) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Binary op, + short e) { + return lanewise(op, broadcast(e)); + } + + /** + * Combines the lane values of this vector + * with the value of a broadcast scalar, + * with selection of lane elements controlled by a mask. + * + * This is a masked lane-wise binary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e), m)}. + * + * @param op the operation used to process lane values + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the result of applying the operation lane-wise + * to the input vector and the scalar + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) + * @see #lanewise(VectorOperators.Binary,short) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Binary op, + short e, + VectorMask<Halffloat> m) { + return lanewise(op, broadcast(e), m); + } + + /** + * {@inheritDoc} <!--workaround--> + * @apiNote + * When working with vector subtypes like {@code HalffloatVector}, + * {@linkplain #lanewise(VectorOperators.Binary,short) + * the more strongly typed method} + * is typically selected. It can be explicitly selected + * using a cast: {@code v.lanewise(op,(short)e)}. + * The two expressions will produce numerically identical results. + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Binary op, + long e) { + short e1 = (short) e; + if ((long)e1 != e) { + vspecies().checkValue(e); // for exception + } + return lanewise(op, e1); + } + + /** + * {@inheritDoc} <!--workaround--> + * @apiNote + * When working with vector subtypes like {@code HalffloatVector}, + * {@linkplain #lanewise(VectorOperators.Binary,short,VectorMask) + * the more strongly typed method} + * is typically selected. It can be explicitly selected + * using a cast: {@code v.lanewise(op,(short)e,m)}. + * The two expressions will produce numerically identical results. + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Binary op, + long e, VectorMask<Halffloat> m) { + short e1 = (short) e; + if ((long)e1 != e) { + vspecies().checkValue(e); // for exception + } + return lanewise(op, e1, m); + } + + + // Ternary lanewise support + + // Ternary operators come in eight variations: + // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2]) + // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask) + + // It is annoying to support all of these variations of masking + // and broadcast, but it would be more surprising not to continue + // the obvious pattern started by unary and binary. + + /** + * {@inheritDoc} <!--workaround--> + * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask) + * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask) + * @see #lanewise(VectorOperators.Ternary,short,short) + * @see #lanewise(VectorOperators.Ternary,Vector,short) + * @see #lanewise(VectorOperators.Ternary,short,Vector) + */ + @Override + public abstract + HalffloatVector lanewise(VectorOperators.Ternary op, + Vector<Halffloat> v1, + Vector<Halffloat> v2); + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Ternary op, + Vector<Halffloat> v1, + Vector<Halffloat> v2) { + HalffloatVector that = (HalffloatVector) v1; + HalffloatVector tother = (HalffloatVector) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), null, Halffloat.class, length(), + this, that, tother, null, + TERN_IMPL.find(op, opc, HalffloatVector::ternaryOperations)); + } + + /** + * {@inheritDoc} <!--workaround--> + * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask) + * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask) + */ + @Override + public abstract + HalffloatVector lanewise(VectorOperators.Ternary op, + Vector<Halffloat> v1, + Vector<Halffloat> v2, + VectorMask<Halffloat> m); + @ForceInline + final + HalffloatVector lanewiseTemplate(VectorOperators.Ternary op, + Class<? extends VectorMask<Halffloat>> maskClass, + Vector<Halffloat> v1, + Vector<Halffloat> v2, + VectorMask<Halffloat> m) { + HalffloatVector that = (HalffloatVector) v1; + HalffloatVector tother = (HalffloatVector) v2; + // It's a word: https://www.dictionary.com/browse/tother + // See also Chapter 11 of Dickens, Our Mutual Friend: + // "Totherest Governor," replied Mr Riderhood... + that.check(this); + tother.check(this); + m.check(maskClass, this); + + int opc = opCode(op); + return VectorSupport.ternaryOp( + opc, getClass(), maskClass, Halffloat.class, length(), + this, that, tother, m, + TERN_IMPL.find(op, opc, HalffloatVector::ternaryOperations)); + } + + private static final + ImplCache<Ternary, TernaryOperation<HalffloatVector, VectorMask<Halffloat>>> + TERN_IMPL = new ImplCache<>(Ternary.class, HalffloatVector.class); + + private static TernaryOperation<HalffloatVector, VectorMask<Halffloat>> ternaryOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> + Halffloat.valueOf(Math.fma(Halffloat.valueOf(a).floatValue(), + Halffloat.valueOf(b).floatValue(), Halffloat.valueOf(c).floatValue()))); + default: return null; + } + } + + /** + * Combines the lane values of this vector + * with the values of two broadcast scalars. + * + * This is a lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}. + * + * @param op the operation used to combine lane values + * @param e1 the first input scalar + * @param e2 the second input scalar + * @return the result of applying the operation lane-wise + * to the input vector and the scalars + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Vector,Vector) + * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2) + short e1, + short e2) { + return lanewise(op, broadcast(e1), broadcast(e2)); + } + + /** + * Combines the lane values of this vector + * with the values of two broadcast scalars, + * with selection of lane elements controlled by a mask. + * + * This is a masked lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}. + * + * @param op the operation used to combine lane values + * @param e1 the first input scalar + * @param e2 the second input scalar + * @param m the mask controlling lane selection + * @return the result of applying the operation lane-wise + * to the input vector and the scalars + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) + * @see #lanewise(VectorOperators.Ternary,short,short) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m) + short e1, + short e2, + VectorMask<Halffloat> m) { + return lanewise(op, broadcast(e1), broadcast(e2), m); + } + + /** + * Combines the lane values of this vector + * with the values of another vector and a broadcast scalar. + * + * This is a lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, v1, this.broadcast(e2))}. + * + * @param op the operation used to combine lane values + * @param v1 the other input vector + * @param e2 the input scalar + * @return the result of applying the operation lane-wise + * to the input vectors and the scalar + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,short,short) + * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2) + Vector<Halffloat> v1, + short e2) { + return lanewise(op, v1, broadcast(e2)); + } + + /** + * Combines the lane values of this vector + * with the values of another vector and a broadcast scalar, + * with selection of lane elements controlled by a mask. + * + * This is a masked lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, v1, this.broadcast(e2), m)}. + * + * @param op the operation used to combine lane values + * @param v1 the other input vector + * @param e2 the input scalar + * @param m the mask controlling lane selection + * @return the result of applying the operation lane-wise + * to the input vectors and the scalar + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Vector,Vector) + * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) + * @see #lanewise(VectorOperators.Ternary,Vector,short) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m) + Vector<Halffloat> v1, + short e2, + VectorMask<Halffloat> m) { + return lanewise(op, v1, broadcast(e2), m); + } + + /** + * Combines the lane values of this vector + * with the values of another vector and a broadcast scalar. + * + * This is a lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e1), v2)}. + * + * @param op the operation used to combine lane values + * @param e1 the input scalar + * @param v2 the other input vector + * @return the result of applying the operation lane-wise + * to the input vectors and the scalar + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Vector,Vector) + * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2) + short e1, + Vector<Halffloat> v2) { + return lanewise(op, broadcast(e1), v2); + } + + /** + * Combines the lane values of this vector + * with the values of another vector and a broadcast scalar, + * with selection of lane elements controlled by a mask. + * + * This is a masked lane-wise ternary operation which applies + * the selected operation to each lane. + * The return value will be equal to this expression: + * {@code this.lanewise(op, this.broadcast(e1), v2, m)}. + * + * @param op the operation used to combine lane values + * @param e1 the input scalar + * @param v2 the other input vector + * @param m the mask controlling lane selection + * @return the result of applying the operation lane-wise + * to the input vectors and the scalar + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) + * @see #lanewise(VectorOperators.Ternary,short,Vector) + */ + @ForceInline + public final + HalffloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m) + short e1, + Vector<Halffloat> v2, + VectorMask<Halffloat> m) { + return lanewise(op, broadcast(e1), v2, m); + } + + // (Thus endeth the Great and Mighty Ternary Ogdoad.) + // https://en.wikipedia.org/wiki/Ogdoad + + /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV + // + // These include masked and non-masked versions. + // This subclass adds broadcast (masked or not). + + /** + * {@inheritDoc} <!--workaround--> + * @see #add(short) + */ + @Override + @ForceInline + public final HalffloatVector add(Vector<Halffloat> v) { + return lanewise(ADD, v); + } + + /** + * Adds this vector to the broadcast of an input scalar. + * + * This is a lane-wise binary operation which applies + * the primitive addition operation ({@code +}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,short) + * lanewise}{@code (}{@link VectorOperators#ADD + * ADD}{@code , e)}. + * + * @param e the input scalar + * @return the result of adding each lane of this vector to the scalar + * @see #add(Vector) + * @see #broadcast(short) + * @see #add(short,VectorMask) + * @see VectorOperators#ADD + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,short) + */ + @ForceInline + public final + HalffloatVector add(short e) { + return lanewise(ADD, e); + } + + /** + * {@inheritDoc} <!--workaround--> + * @see #add(short,VectorMask) + */ + @Override + @ForceInline + public final HalffloatVector add(Vector<Halffloat> v, + VectorMask<Halffloat> m) { + return lanewise(ADD, v, m); + } + + /** + * Adds this vector to the broadcast of an input scalar, + * selecting lane elements controlled by a mask. + * + * This is a masked lane-wise binary operation which applies + * the primitive addition operation ({@code +}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,short,VectorMask) + * lanewise}{@code (}{@link VectorOperators#ADD + * ADD}{@code , s, m)}. + * + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the result of adding each lane of this vector to the scalar + * @see #add(Vector,VectorMask) + * @see #broadcast(short) + * @see #add(short) + * @see VectorOperators#ADD + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,short) + */ + @ForceInline + public final HalffloatVector add(short e, + VectorMask<Halffloat> m) { + return lanewise(ADD, e, m); + } + + /** + * {@inheritDoc} <!--workaround--> + * @see #sub(short) + */ + @Override + @ForceInline + public final HalffloatVector sub(Vector<Halffloat> v) { + return lanewise(SUB, v); + } + + /** + * Subtracts an input scalar from this vector. + * + * This is a masked lane-wise binary operation which applies + * the primitive subtraction operation ({@code -}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,short) + * lanewise}{@code (}{@link VectorOperators#SUB + * SUB}{@code , e)}. + * + * @param e the input scalar + * @return the result of subtracting the scalar from each lane of this vector + * @see #sub(Vector) + * @see #broadcast(short) + * @see #sub(short,VectorMask) + * @see VectorOperators#SUB + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,short) + */ + @ForceInline + public final HalffloatVector sub(short e) { + return lanewise(SUB, e); + } + + /** + * {@inheritDoc} <!--workaround--> + * @see #sub(short,VectorMask) + */ + @Override + @ForceInline + public final HalffloatVector sub(Vector<Halffloat> v, + VectorMask<Halffloat> m) { + return lanewise(SUB, v, m); + } + + /** + * Subtracts an input scalar from this vector + * under the control of a mask. + * + * This is a masked lane-wise binary operation which applies + * the primitive subtraction operation ({@code -}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,short,VectorMask) + * lanewise}{@code (}{@link VectorOperators#SUB + * SUB}{@code , s, m)}. + * + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the result of subtracting the scalar from each lane of this vector + * @see #sub(Vector,VectorMask) + * @see #broadcast(short) + * @see #sub(short) + * @see VectorOperators#SUB + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,short) + */ + @ForceInline + public final HalffloatVector sub(short e, + VectorMask<Halffloat> m) { + return lanewise(SUB, e, m); + } + + /** + * {@inheritDoc} <!--workaround--> + * @see #mul(short) + */ + @Override + @ForceInline + public final HalffloatVector mul(Vector<Halffloat> v) { + return lanewise(MUL, v); + } + + /** + * Multiplies this vector by the broadcast of an input scalar. + * + * This is a lane-wise binary operation which applies + * the primitive multiplication operation ({@code *}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,short) + * lanewise}{@code (}{@link VectorOperators#MUL + * MUL}{@code , e)}. + * + * @param e the input scalar + * @return the result of multiplying this vector by the given scalar + * @see #mul(Vector) + * @see #broadcast(short) + * @see #mul(short,VectorMask) + * @see VectorOperators#MUL + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,short) + */ + @ForceInline + public final HalffloatVector mul(short e) { + return lanewise(MUL, e); + } + + /** + * {@inheritDoc} <!--workaround--> + * @see #mul(short,VectorMask) + */ + @Override + @ForceInline + public final HalffloatVector mul(Vector<Halffloat> v, + VectorMask<Halffloat> m) { + return lanewise(MUL, v, m); + } + + /** + * Multiplies this vector by the broadcast of an input scalar, + * selecting lane elements controlled by a mask. + * + * This is a masked lane-wise binary operation which applies + * the primitive multiplication operation ({@code *}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,short,VectorMask) + * lanewise}{@code (}{@link VectorOperators#MUL + * MUL}{@code , s, m)}. + * + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the result of muling each lane of this vector to the scalar + * @see #mul(Vector,VectorMask) + * @see #broadcast(short) + * @see #mul(short) + * @see VectorOperators#MUL + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,short) + */ + @ForceInline + public final HalffloatVector mul(short e, + VectorMask<Halffloat> m) { + return lanewise(MUL, e, m); + } + + /** + * {@inheritDoc} <!--workaround--> + * @apiNote Because the underlying scalar operator is an IEEE + * floating point number, division by zero in fact will + * not throw an exception, but will yield a signed + * infinity or NaN. + */ + @Override + @ForceInline + public final HalffloatVector div(Vector<Halffloat> v) { + return lanewise(DIV, v); + } + + /** + * Divides this vector by the broadcast of an input scalar. + * + * This is a lane-wise binary operation which applies + * the primitive division operation ({@code /}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,short) + * lanewise}{@code (}{@link VectorOperators#DIV + * DIV}{@code , e)}. + * + * @apiNote Because the underlying scalar operator is an IEEE + * floating point number, division by zero in fact will + * not throw an exception, but will yield a signed + * infinity or NaN. + * + * @param e the input scalar + * @return the result of dividing each lane of this vector by the scalar + * @see #div(Vector) + * @see #broadcast(short) + * @see #div(short,VectorMask) + * @see VectorOperators#DIV + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,short) + */ + @ForceInline + public final HalffloatVector div(short e) { + return lanewise(DIV, e); + } + + /** + * {@inheritDoc} <!--workaround--> + * @see #div(short,VectorMask) + * @apiNote Because the underlying scalar operator is an IEEE + * floating point number, division by zero in fact will + * not throw an exception, but will yield a signed + * infinity or NaN. + */ + @Override + @ForceInline + public final HalffloatVector div(Vector<Halffloat> v, + VectorMask<Halffloat> m) { + return lanewise(DIV, v, m); + } + + /** + * Divides this vector by the broadcast of an input scalar, + * selecting lane elements controlled by a mask. + * + * This is a masked lane-wise binary operation which applies + * the primitive division operation ({@code /}) to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,short,VectorMask) + * lanewise}{@code (}{@link VectorOperators#DIV + * DIV}{@code , s, m)}. + * + * @apiNote Because the underlying scalar operator is an IEEE + * floating point number, division by zero in fact will + * not throw an exception, but will yield a signed + * infinity or NaN. + * + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the result of dividing each lane of this vector by the scalar + * @see #div(Vector,VectorMask) + * @see #broadcast(short) + * @see #div(short) + * @see VectorOperators#DIV + * @see #lanewise(VectorOperators.Binary,Vector) + * @see #lanewise(VectorOperators.Binary,short) + */ + @ForceInline + public final HalffloatVector div(short e, + VectorMask<Halffloat> m) { + return lanewise(DIV, e, m); + } + + /// END OF FULL-SERVICE BINARY METHODS + + /// SECOND-TIER BINARY METHODS + // + // There are no masked versions. + + /** + * {@inheritDoc} <!--workaround--> + * @apiNote + * For this method, floating point negative + * zero {@code -0.0} is treated as a value distinct from, and less + * than the default value (positive zero). + */ + @Override + @ForceInline + public final HalffloatVector min(Vector<Halffloat> v) { + return lanewise(MIN, v); + } + + // FIXME: "broadcast of an input scalar" is really wordy. Reduce? + /** + * Computes the smaller of this vector and the broadcast of an input scalar. + * + * This is a lane-wise binary operation which applies the + * operation {@code Math.min()} to each pair of + * corresponding lane values. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,short) + * lanewise}{@code (}{@link VectorOperators#MIN + * MIN}{@code , e)}. + * + * @param e the input scalar + * @return the result of multiplying this vector by the given scalar + * @see #min(Vector) + * @see #broadcast(short) + * @see VectorOperators#MIN + * @see #lanewise(VectorOperators.Binary,short,VectorMask) + * @apiNote + * For this method, floating point negative + * zero {@code -0.0} is treated as a value distinct from, and less + * than the default value (positive zero). + */ + @ForceInline + public final HalffloatVector min(short e) { + return lanewise(MIN, e); + } + + /** + * {@inheritDoc} <!--workaround--> + * @apiNote + * For this method, floating point negative + * zero {@code -0.0} is treated as a value distinct from, and less + * than the default value (positive zero). + */ + @Override + @ForceInline + public final HalffloatVector max(Vector<Halffloat> v) { + return lanewise(MAX, v); + } + + /** + * Computes the larger of this vector and the broadcast of an input scalar. + * + * This is a lane-wise binary operation which applies the + * operation {@code Math.max()} to each pair of + * corresponding lane values. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,short) + * lanewise}{@code (}{@link VectorOperators#MAX + * MAX}{@code , e)}. + * + * @param e the input scalar + * @return the result of multiplying this vector by the given scalar + * @see #max(Vector) + * @see #broadcast(short) + * @see VectorOperators#MAX + * @see #lanewise(VectorOperators.Binary,short,VectorMask) + * @apiNote + * For this method, floating point negative + * zero {@code -0.0} is treated as a value distinct from, and less + * than the default value (positive zero). + */ + @ForceInline + public final HalffloatVector max(short e) { + return lanewise(MAX, e); + } + + + // common FP operator: pow + /** + * Raises this vector to the power of a second input vector. + * + * This is a lane-wise binary operation which applies an operation + * conforming to the specification of + * {@link Math#pow Math.pow(a,b)} + * to each pair of corresponding lane values. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Vector) + * lanewise}{@code (}{@link VectorOperators#POW + * POW}{@code , b)}. + * + * <p> + * This is not a full-service named operation like + * {@link #add(Vector) add}. A masked version of + * this operation is not directly available + * but may be obtained via the masked version of + * {@code lanewise}. + * + * @param b a vector exponent by which to raise this vector + * @return the {@code b}-th power of this vector + * @see #pow(short) + * @see VectorOperators#POW + * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) + */ + @ForceInline + public final HalffloatVector pow(Vector<Halffloat> b) { + return lanewise(POW, b); + } + + /** + * Raises this vector to a scalar power. + * + * This is a lane-wise binary operation which applies an operation + * conforming to the specification of + * {@link Math#pow Math.pow(a,b)} + * to each pair of corresponding lane values. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Binary,Vector) + * lanewise}{@code (}{@link VectorOperators#POW + * POW}{@code , b)}. + * + * @param b a scalar exponent by which to raise this vector + * @return the {@code b}-th power of this vector + * @see #pow(Vector) + * @see VectorOperators#POW + * @see #lanewise(VectorOperators.Binary,short,VectorMask) + */ + @ForceInline + public final HalffloatVector pow(short b) { + return lanewise(POW, b); + } + + /// UNARY METHODS + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + HalffloatVector neg() { + return lanewise(NEG); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + HalffloatVector abs() { + return lanewise(ABS); + } + + + // sqrt + /** + * Computes the square root of this vector. + * + * This is a lane-wise unary operation which applies an operation + * conforming to the specification of + * {@link Math#sqrt Math.sqrt(a)} + * to each lane value. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Unary) + * lanewise}{@code (}{@link VectorOperators#SQRT + * SQRT}{@code )}. + * + * @return the square root of this vector + * @see VectorOperators#SQRT + * @see #lanewise(VectorOperators.Unary,VectorMask) + */ + @ForceInline + public final HalffloatVector sqrt() { + return lanewise(SQRT); + } + + /// COMPARISONS + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + VectorMask<Halffloat> eq(Vector<Halffloat> v) { + return compare(EQ, v); + } + + /** + * Tests if this vector is equal to an input scalar. + * + * This is a lane-wise binary test operation which applies + * the primitive equals operation ({@code ==}) to each lane. + * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}. + * + * @param e the input scalar + * @return the result mask of testing if this vector + * is equal to {@code e} + * @see #compare(VectorOperators.Comparison,short) + */ + @ForceInline + public final + VectorMask<Halffloat> eq(short e) { + return compare(EQ, e); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + VectorMask<Halffloat> lt(Vector<Halffloat> v) { + return compare(LT, v); + } + + /** + * Tests if this vector is less than an input scalar. + * + * This is a lane-wise binary test operation which applies + * the primitive less than operation ({@code <}) to each lane. + * The result is the same as {@code compare(VectorOperators.LT, e)}. + * + * @param e the input scalar + * @return the mask result of testing if this vector + * is less than the input scalar + * @see #compare(VectorOperators.Comparison,short) + */ + @ForceInline + public final + VectorMask<Halffloat> lt(short e) { + return compare(LT, e); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + VectorMask<Halffloat> test(VectorOperators.Test op); + + /*package-private*/ + @ForceInline + final + <M extends VectorMask<Halffloat>> + M testTemplate(Class<M> maskType, Test op) { + HalffloatSpecies vsp = vspecies(); + if (opKind(op, VO_SPECIAL)) { + ShortVector bits = this.viewAsIntegralLanes(); + VectorMask<Short> m; + if (op == IS_DEFAULT) { + m = bits.compare(EQ, (short) 0); + } else if (op == IS_NEGATIVE) { + m = bits.compare(LT, (short) 0); + } + else if (op == IS_FINITE || + op == IS_NAN || + op == IS_INFINITE) { + // first kill the sign: + bits = bits.and(Short.MAX_VALUE); + // next find the bit pattern for infinity: + short infbits = (short) toBits(Halffloat.POSITIVE_INFINITY); + // now compare: + if (op == IS_FINITE) { + m = bits.compare(LT, infbits); + } else if (op == IS_NAN) { + m = bits.compare(GT, infbits); + } else { + m = bits.compare(EQ, infbits); + } + } + else { + throw new AssertionError(op); + } + return maskType.cast(m.cast(this.vspecies())); + } + int opc = opCode(op); + throw new AssertionError(op); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + VectorMask<Halffloat> test(VectorOperators.Test op, + VectorMask<Halffloat> m) { + return test(op).and(m); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + VectorMask<Halffloat> compare(VectorOperators.Comparison op, Vector<Halffloat> v); + + /*package-private*/ + @ForceInline + final + <M extends VectorMask<Halffloat>> + M compareTemplate(Class<M> maskType, Comparison op, Vector<Halffloat> v) { + HalffloatVector that = (HalffloatVector) v; + that.check(this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, Halffloat.class, length(), + this, that, null, + (cond, v0, v1, m1) -> { + AbstractMask<Halffloat> m + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) m; + return m2; + }); + } + + /*package-private*/ + @ForceInline + final + <M extends VectorMask<Halffloat>> + M compareTemplate(Class<M> maskType, Comparison op, Vector<Halffloat> v, M m) { + HalffloatVector that = (HalffloatVector) v; + that.check(this); + m.check(maskType, this); + int opc = opCode(op); + return VectorSupport.compare( + opc, getClass(), maskType, Halffloat.class, length(), + this, that, m, + (cond, v0, v1, m1) -> { + AbstractMask<Halffloat> cmpM + = v0.bTest(cond, v1, (cond_, i, a, b) + -> compareWithOp(cond, a, b)); + @SuppressWarnings("unchecked") + M m2 = (M) cmpM.and(m1); + return m2; + }); + } + + @ForceInline + private static boolean compareWithOp(int cond, short a, short b) { + return switch (cond) { + case BT_eq -> Halffloat.valueOf(a).floatValue() == Halffloat.valueOf(b).floatValue(); + case BT_ne -> Halffloat.valueOf(a).floatValue() != Halffloat.valueOf(b).floatValue(); + case BT_lt -> Halffloat.valueOf(a).floatValue() < Halffloat.valueOf(b).floatValue(); + case BT_le -> Halffloat.valueOf(a).floatValue() <= Halffloat.valueOf(b).floatValue(); + case BT_gt -> Halffloat.valueOf(a).floatValue() > Halffloat.valueOf(b).floatValue(); + case BT_ge -> Halffloat.valueOf(a).floatValue() >= Halffloat.valueOf(b).floatValue(); + default -> throw new AssertionError(); + }; + } + + /** + * Tests this vector by comparing it with an input scalar, + * according to the given comparison operation. + * + * This is a lane-wise binary test operation which applies + * the comparison operation to each lane. + * <p> + * The result is the same as + * {@code compare(op, broadcast(species(), e))}. + * That is, the scalar may be regarded as broadcast to + * a vector of the same species, and then compared + * against the original vector, using the selected + * comparison operation. + * + * @param op the operation used to compare lane values + * @param e the input scalar + * @return the mask result of testing lane-wise if this vector + * compares to the input, according to the selected + * comparison operator + * @see HalffloatVector#compare(VectorOperators.Comparison,Vector) + * @see #eq(short) + * @see #lt(short) + */ + public abstract + VectorMask<Halffloat> compare(Comparison op, short e); + + /*package-private*/ + @ForceInline + final + <M extends VectorMask<Halffloat>> + M compareTemplate(Class<M> maskType, Comparison op, short e) { + return compareTemplate(maskType, op, broadcast(e)); + } + + /** + * Tests this vector by comparing it with an input scalar, + * according to the given comparison operation, + * in lanes selected by a mask. + * + * This is a masked lane-wise binary test operation which applies + * to each pair of corresponding lane values. + * + * The returned result is equal to the expression + * {@code compare(op,s).and(m)}. + * + * @param op the operation used to compare lane values + * @param e the input scalar + * @param m the mask controlling lane selection + * @return the mask result of testing lane-wise if this vector + * compares to the input, according to the selected + * comparison operator, + * and only in the lanes selected by the mask + * @see HalffloatVector#compare(VectorOperators.Comparison,Vector,VectorMask) + */ + @ForceInline + public final VectorMask<Halffloat> compare(VectorOperators.Comparison op, + short e, + VectorMask<Halffloat> m) { + return compare(op, broadcast(e), m); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + VectorMask<Halffloat> compare(Comparison op, long e); + + /*package-private*/ + @ForceInline + final + <M extends VectorMask<Halffloat>> + M compareTemplate(Class<M> maskType, Comparison op, long e) { + return compareTemplate(maskType, op, broadcast(e)); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + VectorMask<Halffloat> compare(Comparison op, long e, VectorMask<Halffloat> m) { + return compare(op, broadcast(e), m); + } + + + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override public abstract + HalffloatVector blend(Vector<Halffloat> v, VectorMask<Halffloat> m); + + /*package-private*/ + @ForceInline + final + <M extends VectorMask<Halffloat>> + HalffloatVector + blendTemplate(Class<M> maskType, HalffloatVector v, M m) { + v.check(this); + return VectorSupport.blend( + getClass(), maskType, Halffloat.class, length(), + this, v, m, + (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b)); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override public abstract HalffloatVector addIndex(int scale); + + /*package-private*/ + @ForceInline + final HalffloatVector addIndexTemplate(int scale) { + HalffloatSpecies vsp = vspecies(); + // make sure VLENGTH*scale doesn't overflow: + vsp.checkScale(scale); + return VectorSupport.indexVector( + getClass(), Halffloat.class, length(), + this, scale, vsp, + (v, scale_, s) + -> { + // If the platform doesn't support an INDEX + // instruction directly, load IOTA from memory + // and multiply. + HalffloatVector iota = s.iota(); + short sc = (short) scale_; + return v.add(sc == 1 ? iota : iota.mul(sc)); + }); + } + + /** + * Replaces selected lanes of this vector with + * a scalar value + * under the control of a mask. + * + * This is a masked lane-wise binary operation which + * selects each lane value from one or the other input. + * + * The returned result is equal to the expression + * {@code blend(broadcast(e),m)}. + * + * @param e the input scalar, containing the replacement lane value + * @param m the mask controlling lane selection of the scalar + * @return the result of blending the lane elements of this vector with + * the scalar value + */ + @ForceInline + public final HalffloatVector blend(short e, + VectorMask<Halffloat> m) { + return blend(broadcast(e), m); + } + + /** + * Replaces selected lanes of this vector with + * a scalar value + * under the control of a mask. + * + * This is a masked lane-wise binary operation which + * selects each lane value from one or the other input. + * + * The returned result is equal to the expression + * {@code blend(broadcast(e),m)}. + * + * @param e the input scalar, containing the replacement lane value + * @param m the mask controlling lane selection of the scalar + * @return the result of blending the lane elements of this vector with + * the scalar value + */ + @ForceInline + public final HalffloatVector blend(long e, + VectorMask<Halffloat> m) { + return blend(broadcast(e), m); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector slice(int origin, Vector<Halffloat> v1); + + /*package-private*/ + final + @ForceInline + HalffloatVector sliceTemplate(int origin, Vector<Halffloat> v1) { + HalffloatVector that = (HalffloatVector) v1; + that.check(this); + Objects.checkIndex(origin, length() + 1); + VectorShuffle<Halffloat> iota = iotaShuffle(); + VectorMask<Halffloat> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin)))); + iota = iotaShuffle(origin, 1, true); + return that.rearrange(iota).blend(this.rearrange(iota), blendMask); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + HalffloatVector slice(int origin, + Vector<Halffloat> w, + VectorMask<Halffloat> m) { + return broadcast(0).blend(slice(origin, w), m); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector slice(int origin); + + /*package-private*/ + final + @ForceInline + HalffloatVector sliceTemplate(int origin) { + Objects.checkIndex(origin, length() + 1); + VectorShuffle<Halffloat> iota = iotaShuffle(); + VectorMask<Halffloat> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin)))); + iota = iotaShuffle(origin, 1, true); + return vspecies().zero().blend(this.rearrange(iota), blendMask); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector unslice(int origin, Vector<Halffloat> w, int part); + + /*package-private*/ + final + @ForceInline + HalffloatVector + unsliceTemplate(int origin, Vector<Halffloat> w, int part) { + HalffloatVector that = (HalffloatVector) w; + that.check(this); + Objects.checkIndex(origin, length() + 1); + VectorShuffle<Halffloat> iota = iotaShuffle(); + VectorMask<Halffloat> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, + (broadcast((short)(origin)))); + iota = iotaShuffle(-origin, 1, true); + return that.blend(this.rearrange(iota), blendMask); + } + + /*package-private*/ + final + @ForceInline + <M extends VectorMask<Halffloat>> + HalffloatVector + unsliceTemplate(Class<M> maskType, int origin, Vector<Halffloat> w, int part, M m) { + HalffloatVector that = (HalffloatVector) w; + that.check(this); + HalffloatVector slice = that.sliceTemplate(origin, that); + slice = slice.blendTemplate(maskType, this, m); + return slice.unsliceTemplate(origin, w, part); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector unslice(int origin, Vector<Halffloat> w, int part, VectorMask<Halffloat> m); + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector unslice(int origin); + + /*package-private*/ + final + @ForceInline + HalffloatVector + unsliceTemplate(int origin) { + Objects.checkIndex(origin, length() + 1); + VectorShuffle<Halffloat> iota = iotaShuffle(); + VectorMask<Halffloat> blendMask = iota.toVector().compare(VectorOperators.GE, + (broadcast((short)(origin)))); + iota = iotaShuffle(-origin, 1, true); + return vspecies().zero().blend(this.rearrange(iota), blendMask); + } + + private ArrayIndexOutOfBoundsException + wrongPartForSlice(int part) { + String msg = String.format("bad part number %d for slice operation", + part); + return new ArrayIndexOutOfBoundsException(msg); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector rearrange(VectorShuffle<Halffloat> m); + + /*package-private*/ + @ForceInline + final + <S extends VectorShuffle<Halffloat>> + HalffloatVector rearrangeTemplate(Class<S> shuffletype, S shuffle) { + shuffle.checkIndexes(); + return VectorSupport.rearrangeOp( + getClass(), shuffletype, null, Halffloat.class, length(), + this, shuffle, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return v1.lane(ei); + })); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector rearrange(VectorShuffle<Halffloat> s, + VectorMask<Halffloat> m); + + /*package-private*/ + @ForceInline + final + <S extends VectorShuffle<Halffloat>, M extends VectorMask<Halffloat>> + HalffloatVector rearrangeTemplate(Class<S> shuffletype, + Class<M> masktype, + S shuffle, + M m) { + + m.check(masktype, this); + VectorMask<Halffloat> valid = shuffle.laneIsValid(); + if (m.andNot(valid).anyTrue()) { + shuffle.checkIndexes(); + throw new AssertionError(); + } + return VectorSupport.rearrangeOp( + getClass(), shuffletype, masktype, Halffloat.class, length(), + this, shuffle, m, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return ei < 0 || !m_.laneIsSet(i) ? 0 : v1.lane(ei); + })); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector rearrange(VectorShuffle<Halffloat> s, + Vector<Halffloat> v); + + /*package-private*/ + @ForceInline + final + <S extends VectorShuffle<Halffloat>> + HalffloatVector rearrangeTemplate(Class<S> shuffletype, + S shuffle, + HalffloatVector v) { + VectorMask<Halffloat> valid = shuffle.laneIsValid(); + @SuppressWarnings("unchecked") + S ws = (S) shuffle.wrapIndexes(); + HalffloatVector r0 = + VectorSupport.rearrangeOp( + getClass(), shuffletype, null, Halffloat.class, length(), + this, ws, null, + (v0, s_, m_) -> v0.uOp((i, a) -> { + int ei = s_.laneSource(i); + return v0.lane(ei); + })); + HalffloatVector r1 = + VectorSupport.rearrangeOp( + getClass(), shuffletype, null, Halffloat.class, length(), + v, ws, null, + (v1, s_, m_) -> v1.uOp((i, a) -> { + int ei = s_.laneSource(i); + return v1.lane(ei); + })); + return r1.blend(r0, valid); + } + + @ForceInline + private final + VectorShuffle<Halffloat> toShuffle0(HalffloatSpecies dsp) { + short[] a = toArray(); + int[] sa = new int[a.length]; + for (int i = 0; i < a.length; i++) { + sa[i] = (int) a[i]; + } + return VectorShuffle.fromArray(dsp, sa, 0); + } + + /*package-private*/ + @ForceInline + final + VectorShuffle<Halffloat> toShuffleTemplate(Class<?> shuffleType) { + HalffloatSpecies vsp = vspecies(); + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + getClass(), short.class, length(), + shuffleType, byte.class, length(), + this, vsp, + HalffloatVector::toShuffle0); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector selectFrom(Vector<Halffloat> v); + + /*package-private*/ + @ForceInline + final HalffloatVector selectFromTemplate(HalffloatVector v) { + return v.rearrange(this.toShuffle()); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + public abstract + HalffloatVector selectFrom(Vector<Halffloat> s, VectorMask<Halffloat> m); + + /*package-private*/ + @ForceInline + final HalffloatVector selectFromTemplate(HalffloatVector v, + AbstractMask<Halffloat> m) { + return v.rearrange(this.toShuffle(), m); + } + + /// Ternary operations + + + /** + * Multiplies this vector by a second input vector, and sums + * the result with a third. + * + * Extended precision is used for the intermediate result, + * avoiding possible loss of precision from rounding once + * for each of the two operations. + * The result is numerically close to {@code this.mul(b).add(c)}, + * and is typically closer to the true mathematical result. + * + * This is a lane-wise ternary operation which applies an operation + * conforming to the specification of + * {@link Math#fma(short,short,short) Math.fma(a,b,c)} + * to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) + * lanewise}{@code (}{@link VectorOperators#FMA + * FMA}{@code , b, c)}. + * + * @param b the second input vector, supplying multiplier values + * @param c the third input vector, supplying addend values + * @return the product of this vector and the second input vector + * summed with the third input vector, using extended precision + * for the intermediate result + * @see #fma(short,short) + * @see VectorOperators#FMA + * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) + */ + @ForceInline + public final + HalffloatVector fma(Vector<Halffloat> b, Vector<Halffloat> c) { + return lanewise(FMA, b, c); + } + + /** + * Multiplies this vector by a scalar multiplier, and sums + * the result with a scalar addend. + * + * Extended precision is used for the intermediate result, + * avoiding possible loss of precision from rounding once + * for each of the two operations. + * The result is numerically close to {@code this.mul(b).add(c)}, + * and is typically closer to the true mathematical result. + * + * This is a lane-wise ternary operation which applies an operation + * conforming to the specification of + * {@link Math#fma(short,short,short) Math.fma(a,b,c)} + * to each lane. + * + * This method is also equivalent to the expression + * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) + * lanewise}{@code (}{@link VectorOperators#FMA + * FMA}{@code , b, c)}. + * + * @param b the scalar multiplier + * @param c the scalar addend + * @return the product of this vector and the scalar multiplier + * summed with scalar addend, using extended precision + * for the intermediate result + * @see #fma(Vector,Vector) + * @see VectorOperators#FMA + * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) + */ + @ForceInline + public final + HalffloatVector fma(short b, short c) { + return lanewise(FMA, b, c); + } + + // Don't bother with (Vector,short) and (short,Vector) overloadings. + + // Type specific horizontal reductions + + /** + * Returns a value accumulated from all the lanes of this vector. + * + * This is an associative cross-lane reduction operation which + * applies the specified operation to all the lane elements. + * <p> + * A few reduction operations do not support arbitrary reordering + * of their operands, yet are included here because of their + * usefulness. + * <ul> + * <li> + * In the case of {@code FIRST_NONZERO}, the reduction returns + * the value from the lowest-numbered non-zero lane. + * (As with {@code MAX} and {@code MIN}, floating point negative + * zero {@code -0.0} is treated as a value distinct from + * the default value, positive zero. So a first-nonzero lane reduction + * might return {@code -0.0} even in the presence of non-zero + * lane values.) + * <li> + * In the case of {@code ADD} and {@code MUL}, the + * precise result will reflect the choice of an arbitrary order + * of operations, which may even vary over time. + * For further details see the section + * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>. + * <li> + * All other reduction operations are fully commutative and + * associative. The implementation can choose any order of + * processing, yet it will always produce the same result. + * </ul> + * + * @param op the operation used to combine lane values + * @return the accumulated result + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #reduceLanes(VectorOperators.Associative,VectorMask) + * @see #add(Vector) + * @see #mul(Vector) + * @see #min(Vector) + * @see #max(Vector) + * @see VectorOperators#FIRST_NONZERO + */ + public abstract short reduceLanes(VectorOperators.Associative op); + + /** + * Returns a value accumulated from selected lanes of this vector, + * controlled by a mask. + * + * This is an associative cross-lane reduction operation which + * applies the specified operation to the selected lane elements. + * <p> + * If no elements are selected, an operation-specific identity + * value is returned. + * <ul> + * <li> + * If the operation is + * {@code ADD} + * or {@code FIRST_NONZERO}, + * then the identity value is positive zero, the default {@code short} value. + * <li> + * If the operation is {@code MUL}, + * then the identity value is one. + * <li> + * If the operation is {@code MAX}, + * then the identity value is {@code Halffloat.NEGATIVE_INFINITY}. + * <li> + * If the operation is {@code MIN}, + * then the identity value is {@code Halffloat.POSITIVE_INFINITY}. + * </ul> + * <p> + * A few reduction operations do not support arbitrary reordering + * of their operands, yet are included here because of their + * usefulness. + * <ul> + * <li> + * In the case of {@code FIRST_NONZERO}, the reduction returns + * the value from the lowest-numbered non-zero lane. + * (As with {@code MAX} and {@code MIN}, floating point negative + * zero {@code -0.0} is treated as a value distinct from + * the default value, positive zero. So a first-nonzero lane reduction + * might return {@code -0.0} even in the presence of non-zero + * lane values.) + * <li> + * In the case of {@code ADD} and {@code MUL}, the + * precise result will reflect the choice of an arbitrary order + * of operations, which may even vary over time. + * For further details see the section + * <a href="VectorOperators.html#fp_assoc">Operations on floating point vectors</a>. + * <li> + * All other reduction operations are fully commutative and + * associative. The implementation can choose any order of + * processing, yet it will always produce the same result. + * </ul> + * + * @param op the operation used to combine lane values + * @param m the mask controlling lane selection + * @return the reduced result accumulated from the selected lane values + * @throws UnsupportedOperationException if this vector does + * not support the requested operation + * @see #reduceLanes(VectorOperators.Associative) + */ + public abstract short reduceLanes(VectorOperators.Associative op, + VectorMask<Halffloat> m); + + /*package-private*/ + @ForceInline + final + short reduceLanesTemplate(VectorOperators.Associative op, + Class<? extends VectorMask<Halffloat>> maskClass, + VectorMask<Halffloat> m) { + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + HalffloatVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, Halffloat.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, HalffloatVector::reductionOperations))); + } + + /*package-private*/ + @ForceInline + final + short reduceLanesTemplate(VectorOperators.Associative op) { + if (op == FIRST_NONZERO) { + // FIXME: The JIT should handle this, and other scan ops alos. + VectorMask<Short> thisNZ + = this.viewAsIntegralLanes().compare(NE, (short) 0); + return this.lane(thisNZ.firstTrue()); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), null, Halffloat.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, HalffloatVector::reductionOperations))); + } + + private static final + ImplCache<Associative, ReductionOperation<HalffloatVector, VectorMask<Halffloat>>> + REDUCE_IMPL = new ImplCache<>(Associative.class, HalffloatVector.class); + + private static ReductionOperation<HalffloatVector, VectorMask<Halffloat>> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((short)0, m, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() + Halffloat.valueOf(b).floatValue())))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((short)1, m, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() * Halffloat.valueOf(b).floatValue())))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> Halffloat.valueOf(Math.min(Halffloat.valueOf(a).floatValue(), Halffloat.valueOf(b).floatValue())))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> Halffloat.valueOf(Math.max(Halffloat.valueOf(a).floatValue(), Halffloat.valueOf(b).floatValue())))); + default: return null; + } + } + + private + @ForceInline + HalffloatVector reduceIdentityVector(VectorOperators.Associative op) { + int opc = opCode(op); + UnaryOperator<HalffloatVector> fn + = REDUCE_ID_IMPL.find(op, opc, (opc_) -> { + switch (opc_) { + case VECTOR_OP_ADD: + return v -> v.broadcast(0); + case VECTOR_OP_MUL: + return v -> v.broadcast(1); + case VECTOR_OP_MIN: + return v -> v.broadcast(MAX_OR_INF); + case VECTOR_OP_MAX: + return v -> v.broadcast(MIN_OR_INF); + default: return null; + } + }); + return fn.apply(this); + } + private static final + ImplCache<Associative,UnaryOperator<HalffloatVector>> REDUCE_ID_IMPL + = new ImplCache<>(Associative.class, HalffloatVector.class); + + private static final short MIN_OR_INF = Halffloat.NEGATIVE_INFINITY; + private static final short MAX_OR_INF = Halffloat.POSITIVE_INFINITY; + + public @Override abstract long reduceLanesToLong(VectorOperators.Associative op); + public @Override abstract long reduceLanesToLong(VectorOperators.Associative op, + VectorMask<Halffloat> m); + + // Type specific accessors + + /** + * Gets the lane element at lane index {@code i} + * + * @param i the lane index + * @return the lane element at lane index {@code i} + * @throws IllegalArgumentException if the index is is out of range + * ({@code < 0 || >= length()}) + */ + public abstract short lane(int i); + + /** + * Replaces the lane element of this vector at lane index {@code i} with + * value {@code e}. + * + * This is a cross-lane operation and behaves as if it returns the result + * of blending this vector with an input vector that is the result of + * broadcasting {@code e} and a mask that has only one lane set at lane + * index {@code i}. + * + * @param i the lane index of the lane element to be replaced + * @param e the value to be placed + * @return the result of replacing the lane element of this vector at lane + * index {@code i} with value {@code e}. + * @throws IllegalArgumentException if the index is is out of range + * ({@code < 0 || >= length()}) + */ + public abstract HalffloatVector withLane(int i, short e); + + // Memory load operations + + /** + * Returns an array of type {@code short[]} + * containing all the lane values. + * The array length is the same as the vector length. + * The array elements are stored in lane order. + * <p> + * This method behaves as if it stores + * this vector into an allocated array + * (using {@link #intoArray(short[], int) intoArray}) + * and returns the array as follows: + * <pre>{@code + * short[] a = new short[this.length()]; + * this.intoArray(a, 0); + * return a; + * }</pre> + * + * @return an array containing the lane values of this vector + */ + @ForceInline + @Override + public final short[] toArray() { + short[] a = new short[vspecies().laneCount()]; + intoArray(a, 0); + return a; + } + + /** {@inheritDoc} <!--workaround--> + */ + @ForceInline + @Override + public final int[] toIntArray() { + short[] a = toArray(); + int[] res = new int[a.length]; + for (int i = 0; i < a.length; i++) { + short e = a[i]; + res[i] = (int) HalffloatSpecies.toIntegralChecked(e, true); + } + return res; + } + + /** {@inheritDoc} <!--workaround--> + */ + @ForceInline + @Override + public final long[] toLongArray() { + short[] a = toArray(); + long[] res = new long[a.length]; + for (int i = 0; i < a.length; i++) { + short e = a[i]; + res[i] = HalffloatSpecies.toIntegralChecked(e, false); + } + return res; + } + + /** {@inheritDoc} <!--workaround--> + * @implNote + * When this method is used on used on vectors + * of type {@code HalffloatVector}, + * there will be no loss of precision. + */ + @ForceInline + @Override + public final double[] toDoubleArray() { + short[] a = toArray(); + double[] res = new double[a.length]; + for (int i = 0; i < a.length; i++) { + res[i] = (double) a[i]; + } + return res; + } + + /** + * Loads a vector from a byte array starting at an offset. + * Bytes are composed into primitive lane elements according + * to the specified byte order. + * The vector is arranged into lanes according to + * <a href="Vector.html#lane-order">memory ordering</a>. + * <p> + * This method behaves as if it returns the result of calling + * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) + * fromByteBuffer()} as follows: + * <pre>{@code + * var bb = ByteBuffer.wrap(a); + * var m = species.maskAll(true); + * return fromByteBuffer(species, bb, offset, bo, m); + * }</pre> + * + * @param species species of desired vector + * @param a the byte array + * @param offset the offset into the array + * @param bo the intended byte order + * @return a vector loaded from a byte array + * @throws IndexOutOfBoundsException + * if {@code offset+N*ESIZE < 0} + * or {@code offset+(N+1)*ESIZE > a.length} + * for any lane {@code N} in the vector + */ + @ForceInline + public static + HalffloatVector fromByteArray(VectorSpecies<Halffloat> species, + byte[] a, int offset, + ByteOrder bo) { + offset = checkFromIndexSize(offset, species.vectorByteSize(), a.length); + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.dummyVector().fromByteArray0(a, offset).maybeSwap(bo); + } + + /** + * Loads a vector from a byte array starting at an offset + * and using a mask. + * Lanes where the mask is unset are filled with the default + * value of {@code short} (positive zero). + * Bytes are composed into primitive lane elements according + * to the specified byte order. + * The vector is arranged into lanes according to + * <a href="Vector.html#lane-order">memory ordering</a>. + * <p> + * This method behaves as if it returns the result of calling + * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) + * fromByteBuffer()} as follows: + * <pre>{@code + * var bb = ByteBuffer.wrap(a); + * return fromByteBuffer(species, bb, offset, bo, m); + * }</pre> + * + * @param species species of desired vector + * @param a the byte array + * @param offset the offset into the array + * @param bo the intended byte order + * @param m the mask controlling lane selection + * @return a vector loaded from a byte array + * @throws IndexOutOfBoundsException + * if {@code offset+N*ESIZE < 0} + * or {@code offset+(N+1)*ESIZE > a.length} + * for any lane {@code N} in the vector + * where the mask is set + */ + @ForceInline + public static + HalffloatVector fromByteArray(VectorSpecies<Halffloat> species, + byte[] a, int offset, + ByteOrder bo, + VectorMask<Halffloat> m) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + if (offset >= 0 && offset <= (a.length - species.vectorByteSize())) { + return vsp.dummyVector().fromByteArray0(a, offset, m).maybeSwap(bo); + } + + // FIXME: optimize + checkMaskFromIndexSize(offset, vsp, m, 2, a.length); + ByteBuffer wb = wrapper(a, bo); + return vsp.ldOp(wb, offset, (AbstractMask<Halffloat>)m, + (wb_, o, i) -> wb_.getShort(o + i * 2)); + } + + /** + * Loads a vector from an array of type {@code short[]} + * starting at an offset. + * For each vector lane, where {@code N} is the vector lane index, the + * array element at index {@code offset + N} is placed into the + * resulting vector at lane index {@code N}. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array + * @return the vector loaded from an array + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + */ + @ForceInline + public static + HalffloatVector fromArray(VectorSpecies<Halffloat> species, + short[] a, int offset) { + offset = checkFromIndexSize(offset, species.length(), a.length); + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.dummyVector().fromArray0(a, offset); + } + + /** + * Loads a vector from an array of type {@code short[]} + * starting at an offset and using a mask. + * Lanes where the mask is unset are filled with the default + * value of {@code short} (positive zero). + * For each vector lane, where {@code N} is the vector lane index, + * if the mask lane at index {@code N} is set then the array element at + * index {@code offset + N} is placed into the resulting vector at lane index + * {@code N}, otherwise the default element value is placed into the + * resulting vector at lane index {@code N}. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array + * @param m the mask controlling lane selection + * @return the vector loaded from an array + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + * where the mask is set + */ + @ForceInline + public static + HalffloatVector fromArray(VectorSpecies<Halffloat> species, + short[] a, int offset, + VectorMask<Halffloat> m) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + if (offset >= 0 && offset <= (a.length - species.length())) { + return vsp.dummyVector().fromArray0(a, offset, m); + } + + // FIXME: optimize + checkMaskFromIndexSize(offset, vsp, m, 1, a.length); + return vsp.vOp(m, i -> a[offset + i]); + } + + /** + * Gathers a new vector composed of elements from an array of type + * {@code short[]}, + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an <em>index map</em>. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * the lane is loaded from the array + * element {@code a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array, may be negative if relative + * indexes in the index map compensate to produce a value within the + * array bounds + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @return the vector loaded from the indexed elements of the array + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public static + HalffloatVector fromArray(VectorSpecies<Halffloat> species, + short[] a, int offset, + int[] indexMap, int mapOffset) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]); + } + + /** + * Gathers a new vector composed of elements from an array of type + * {@code short[]}, + * under the control of a mask, and + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an <em>index map</em>. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * if the lane is set in the mask, + * the lane is loaded from the array + * element {@code a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * Unset lanes in the resulting vector are set to zero. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array, may be negative if relative + * indexes in the index map compensate to produce a value within the + * array bounds + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @param m the mask controlling lane selection + * @return the vector loaded from the indexed elements of the array + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * where the mask is set + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public static + HalffloatVector fromArray(VectorSpecies<Halffloat> species, + short[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask<Halffloat> m) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]); + } + + /** + * Loads a vector from an array of type {@code char[]} + * starting at an offset. + * For each vector lane, where {@code N} is the vector lane index, the + * array element at index {@code offset + N} + * is first cast to a {@code short} value and then + * placed into the resulting vector at lane index {@code N}. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array + * @return the vector loaded from an array + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + */ + @ForceInline + public static + HalffloatVector fromCharArray(VectorSpecies<Halffloat> species, + char[] a, int offset) { + offset = checkFromIndexSize(offset, species.length(), a.length); + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.dummyVector().fromCharArray0(a, offset); + } + + /** + * Loads a vector from an array of type {@code char[]} + * starting at an offset and using a mask. + * Lanes where the mask is unset are filled with the default + * value of {@code short} (positive zero). + * For each vector lane, where {@code N} is the vector lane index, + * if the mask lane at index {@code N} is set then the array element at + * index {@code offset + N} + * is first cast to a {@code short} value and then + * placed into the resulting vector at lane index + * {@code N}, otherwise the default element value is placed into the + * resulting vector at lane index {@code N}. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array + * @param m the mask controlling lane selection + * @return the vector loaded from an array + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + * where the mask is set + */ + @ForceInline + public static + HalffloatVector fromCharArray(VectorSpecies<Halffloat> species, + char[] a, int offset, + VectorMask<Halffloat> m) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + if (offset >= 0 && offset <= (a.length - species.length())) { + return vsp.dummyVector().fromCharArray0(a, offset, m); + } + + // FIXME: optimize + checkMaskFromIndexSize(offset, vsp, m, 1, a.length); + return vsp.vOp(m, i -> (short) a[offset + i]); + } + + /** + * Gathers a new vector composed of elements from an array of type + * {@code char[]}, + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an <em>index map</em>. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * the lane is loaded from the expression + * {@code (short) a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array, may be negative if relative + * indexes in the index map compensate to produce a value within the + * array bounds + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @return the vector loaded from the indexed elements of the array + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public static + HalffloatVector fromCharArray(VectorSpecies<Halffloat> species, + char[] a, int offset, + int[] indexMap, int mapOffset) { + // FIXME: optimize + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.vOp(n -> (short) a[offset + indexMap[mapOffset + n]]); + } + + /** + * Gathers a new vector composed of elements from an array of type + * {@code char[]}, + * under the control of a mask, and + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an <em>index map</em>. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * if the lane is set in the mask, + * the lane is loaded from the expression + * {@code (short) a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * Unset lanes in the resulting vector are set to zero. + * + * @param species species of desired vector + * @param a the array + * @param offset the offset into the array, may be negative if relative + * indexes in the index map compensate to produce a value within the + * array bounds + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @param m the mask controlling lane selection + * @return the vector loaded from the indexed elements of the array + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * where the mask is set + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public static + HalffloatVector fromCharArray(VectorSpecies<Halffloat> species, + char[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask<Halffloat> m) { + // FIXME: optimize + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.vOp(m, n -> (short) a[offset + indexMap[mapOffset + n]]); + } + + + /** + * Loads a vector from a {@linkplain ByteBuffer byte buffer} + * starting at an offset into the byte buffer. + * Bytes are composed into primitive lane elements according + * to the specified byte order. + * The vector is arranged into lanes according to + * <a href="Vector.html#lane-order">memory ordering</a>. + * <p> + * This method behaves as if it returns the result of calling + * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) + * fromByteBuffer()} as follows: + * <pre>{@code + * var m = species.maskAll(true); + * return fromByteBuffer(species, bb, offset, bo, m); + * }</pre> + * + * @param species species of desired vector + * @param bb the byte buffer + * @param offset the offset into the byte buffer + * @param bo the intended byte order + * @return a vector loaded from a byte buffer + * @throws IndexOutOfBoundsException + * if {@code offset+N*2 < 0} + * or {@code offset+N*2 >= bb.limit()} + * for any lane {@code N} in the vector + */ + @ForceInline + public static + HalffloatVector fromByteBuffer(VectorSpecies<Halffloat> species, + ByteBuffer bb, int offset, + ByteOrder bo) { + offset = checkFromIndexSize(offset, species.vectorByteSize(), bb.limit()); + HalffloatSpecies vsp = (HalffloatSpecies) species; + return vsp.dummyVector().fromByteBuffer0(bb, offset).maybeSwap(bo); + } + + /** + * Loads a vector from a {@linkplain ByteBuffer byte buffer} + * starting at an offset into the byte buffer + * and using a mask. + * Lanes where the mask is unset are filled with the default + * value of {@code short} (positive zero). + * Bytes are composed into primitive lane elements according + * to the specified byte order. + * The vector is arranged into lanes according to + * <a href="Vector.html#lane-order">memory ordering</a>. + * <p> + * The following pseudocode illustrates the behavior: + * <pre>{@code + * HalffloatBuffer eb = bb.duplicate() + * .position(offset) + * .order(bo).asHalffloatBuffer(); + * short[] ar = new short[species.length()]; + * for (int n = 0; n < ar.length; n++) { + * if (m.laneIsSet(n)) { + * ar[n] = eb.get(n); + * } + * } + * HalffloatVector r = HalffloatVector.fromArray(species, ar, 0); + * }</pre> + * @implNote + * This operation is likely to be more efficient if + * the specified byte order is the same as + * {@linkplain ByteOrder#nativeOrder() + * the platform native order}, + * since this method will not need to reorder + * the bytes of lane values. + * + * @param species species of desired vector + * @param bb the byte buffer + * @param offset the offset into the byte buffer + * @param bo the intended byte order + * @param m the mask controlling lane selection + * @return a vector loaded from a byte buffer + * @throws IndexOutOfBoundsException + * if {@code offset+N*2 < 0} + * or {@code offset+N*2 >= bb.limit()} + * for any lane {@code N} in the vector + * where the mask is set + */ + @ForceInline + public static + HalffloatVector fromByteBuffer(VectorSpecies<Halffloat> species, + ByteBuffer bb, int offset, + ByteOrder bo, + VectorMask<Halffloat> m) { + HalffloatSpecies vsp = (HalffloatSpecies) species; + if (offset >= 0 && offset <= (bb.limit() - species.vectorByteSize())) { + return vsp.dummyVector().fromByteBuffer0(bb, offset, m).maybeSwap(bo); + } + + // FIXME: optimize + checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit()); + ByteBuffer wb = wrapper(bb, bo); + return vsp.ldOp(wb, offset, (AbstractMask<Halffloat>)m, + (wb_, o, i) -> wb_.getShort(o + i * 2)); + } + + // Memory store operations + + /** + * Stores this vector into an array of type {@code short[]} + * starting at an offset. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * the lane element at index {@code N} is stored into the array + * element {@code a[offset+N]}. + * + * @param a the array, of type {@code short[]} + * @param offset the offset into the array + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + */ + @ForceInline + public final + void intoArray(short[] a, int offset) { + offset = checkFromIndexSize(offset, length(), a.length); + HalffloatSpecies vsp = vspecies(); + VectorSupport.store( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + this, + a, offset, + (arr, off, v) + -> v.stOp(arr, off, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + + /** + * Stores this vector into an array of type {@code short[]} + * starting at offset and using a mask. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * the lane element at index {@code N} is stored into the array + * element {@code a[offset+N]}. + * If the mask lane at {@code N} is unset then the corresponding + * array element {@code a[offset+N]} is left unchanged. + * <p> + * Array range checking is done for lanes where the mask is set. + * Lanes where the mask is unset are not stored and do not need + * to correspond to legitimate elements of {@code a}. + * That is, unset lanes may correspond to array indexes less than + * zero or beyond the end of the array. + * + * @param a the array, of type {@code short[]} + * @param offset the offset into the array + * @param m the mask controlling lane storage + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + * where the mask is set + */ + @ForceInline + public final + void intoArray(short[] a, int offset, + VectorMask<Halffloat> m) { + if (m.allTrue()) { + intoArray(a, offset); + } else { + HalffloatSpecies vsp = vspecies(); + checkMaskFromIndexSize(offset, vsp, m, 1, a.length); + intoArray0(a, offset, m); + } + } + + /** + * Scatters this vector into an array of type {@code short[]} + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an <em>index map</em>. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * the lane element at index {@code N} is stored into the array + * element {@code a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * + * @param a the array + * @param offset an offset to combine with the index map offsets + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public final + void intoArray(short[] a, int offset, + int[] indexMap, int mapOffset) { + stOp(a, offset, + (arr, off, i, e) -> { + int j = indexMap[mapOffset + i]; + arr[off + j] = e; + }); + } + + /** + * Scatters this vector into an array of type {@code short[]}, + * under the control of a mask, and + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an <em>index map</em>. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * if the mask lane at index {@code N} is set then + * the lane element at index {@code N} is stored into the array + * element {@code a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * + * @param a the array + * @param offset an offset to combine with the index map offsets + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @param m the mask + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * where the mask is set + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public final + void intoArray(short[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask<Halffloat> m) { + stOp(a, offset, m, + (arr, off, i, e) -> { + int j = indexMap[mapOffset + i]; + arr[off + j] = e; + }); + } + + /** + * Stores this vector into an array of type {@code char[]} + * starting at an offset. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * the lane element at index {@code N} + * is first cast to a {@code char} value and then + * stored into the array element {@code a[offset+N]}. + * + * @param a the array, of type {@code char[]} + * @param offset the offset into the array + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + */ + @ForceInline + public final + void intoCharArray(char[] a, int offset) { + offset = checkFromIndexSize(offset, length(), a.length); + HalffloatSpecies vsp = vspecies(); + VectorSupport.store( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + a, charArrayAddress(a, offset), + this, + a, offset, + (arr, off, v) + -> v.stOp(arr, off, + (arr_, off_, i, e) -> arr_[off_ + i] = (char) e)); + } + + /** + * Stores this vector into an array of type {@code char[]} + * starting at offset and using a mask. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * the lane element at index {@code N} + * is first cast to a {@code char} value and then + * stored into the array element {@code a[offset+N]}. + * If the mask lane at {@code N} is unset then the corresponding + * array element {@code a[offset+N]} is left unchanged. + * <p> + * Array range checking is done for lanes where the mask is set. + * Lanes where the mask is unset are not stored and do not need + * to correspond to legitimate elements of {@code a}. + * That is, unset lanes may correspond to array indexes less than + * zero or beyond the end of the array. + * + * @param a the array, of type {@code char[]} + * @param offset the offset into the array + * @param m the mask controlling lane storage + * @throws IndexOutOfBoundsException + * if {@code offset+N < 0} or {@code offset+N >= a.length} + * for any lane {@code N} in the vector + * where the mask is set + */ + @ForceInline + public final + void intoCharArray(char[] a, int offset, + VectorMask<Halffloat> m) { + if (m.allTrue()) { + intoCharArray(a, offset); + } else { + HalffloatSpecies vsp = vspecies(); + checkMaskFromIndexSize(offset, vsp, m, 1, a.length); + intoCharArray0(a, offset, m); + } + } + + /** + * Scatters this vector into an array of type {@code char[]} + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an <em>index map</em>. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * the lane element at index {@code N} + * is first cast to a {@code char} value and then + * stored into the array + * element {@code a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * + * @param a the array + * @param offset an offset to combine with the index map offsets + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public final + void intoCharArray(char[] a, int offset, + int[] indexMap, int mapOffset) { + // FIXME: optimize + stOp(a, offset, + (arr, off, i, e) -> { + int j = indexMap[mapOffset + i]; + arr[off + j] = (char) e; + }); + } + + /** + * Scatters this vector into an array of type {@code char[]}, + * under the control of a mask, and + * using indexes obtained by adding a fixed {@code offset} to a + * series of secondary offsets from an <em>index map</em>. + * The index map is a contiguous sequence of {@code VLENGTH} + * elements in a second array of {@code int}s, starting at a given + * {@code mapOffset}. + * <p> + * For each vector lane, where {@code N} is the vector lane index, + * if the mask lane at index {@code N} is set then + * the lane element at index {@code N} + * is first cast to a {@code char} value and then + * stored into the array + * element {@code a[f(N)]}, where {@code f(N)} is the + * index mapping expression + * {@code offset + indexMap[mapOffset + N]]}. + * + * @param a the array + * @param offset an offset to combine with the index map offsets + * @param indexMap the index map + * @param mapOffset the offset into the index map + * @param m the mask + * @throws IndexOutOfBoundsException + * if {@code mapOffset+N < 0} + * or if {@code mapOffset+N >= indexMap.length}, + * or if {@code f(N)=offset+indexMap[mapOffset+N]} + * is an invalid index into {@code a}, + * for any lane {@code N} in the vector + * where the mask is set + * @see HalffloatVector#toIntArray() + */ + @ForceInline + public final + void intoCharArray(char[] a, int offset, + int[] indexMap, int mapOffset, + VectorMask<Halffloat> m) { + // FIXME: optimize + stOp(a, offset, m, + (arr, off, i, e) -> { + int j = indexMap[mapOffset + i]; + arr[off + j] = (char) e; + }); + } + + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + void intoByteArray(byte[] a, int offset, + ByteOrder bo) { + offset = checkFromIndexSize(offset, byteSize(), a.length); + maybeSwap(bo).intoByteArray0(a, offset); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + void intoByteArray(byte[] a, int offset, + ByteOrder bo, + VectorMask<Halffloat> m) { + if (m.allTrue()) { + intoByteArray(a, offset, bo); + } else { + HalffloatSpecies vsp = vspecies(); + checkMaskFromIndexSize(offset, vsp, m, 2, a.length); + maybeSwap(bo).intoByteArray0(a, offset, m); + } + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + void intoByteBuffer(ByteBuffer bb, int offset, + ByteOrder bo) { + if (ScopedMemoryAccess.isReadOnly(bb)) { + throw new ReadOnlyBufferException(); + } + offset = checkFromIndexSize(offset, byteSize(), bb.limit()); + maybeSwap(bo).intoByteBuffer0(bb, offset); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + void intoByteBuffer(ByteBuffer bb, int offset, + ByteOrder bo, + VectorMask<Halffloat> m) { + if (m.allTrue()) { + intoByteBuffer(bb, offset, bo); + } else { + if (bb.isReadOnly()) { + throw new ReadOnlyBufferException(); + } + HalffloatSpecies vsp = vspecies(); + checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit()); + maybeSwap(bo).intoByteBuffer0(bb, offset, m); + } + } + + // ================================================ + + // Low-level memory operations. + // + // Note that all of these operations *must* inline into a context + // where the exact species of the involved vector is a + // compile-time constant. Otherwise, the intrinsic generation + // will fail and performance will suffer. + // + // In many cases this is achieved by re-deriving a version of the + // method in each concrete subclass (per species). The re-derived + // method simply calls one of these generic methods, with exact + // parameters for the controlling metadata, which is either a + // typed vector or constant species instance. + + // Unchecked loading operations in native byte order. + // Caller is responsible for applying index checks, masking, and + // byte swapping. + + /*package-private*/ + abstract + HalffloatVector fromArray0(short[] a, int offset); + @ForceInline + final + HalffloatVector fromArray0Template(short[] a, int offset) { + HalffloatSpecies vsp = vspecies(); + return VectorSupport.load( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + a, offset, vsp, + (arr, off, s) -> s.ldOp(arr, off, + (arr_, off_, i) -> arr_[off_ + i])); + } + + /*package-private*/ + abstract + HalffloatVector fromArray0(short[] a, int offset, VectorMask<Halffloat> m); + @ForceInline + final + <M extends VectorMask<Halffloat>> + HalffloatVector fromArray0Template(Class<M> maskClass, short[] a, int offset, M m) { + m.check(species()); + HalffloatSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> arr_[off_ + i])); + } + + + /*package-private*/ + abstract + HalffloatVector fromCharArray0(char[] a, int offset); + @ForceInline + final + HalffloatVector fromCharArray0Template(char[] a, int offset) { + HalffloatSpecies vsp = vspecies(); + return VectorSupport.load( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + a, charArrayAddress(a, offset), + a, offset, vsp, + (arr, off, s) -> s.ldOp(arr, off, + (arr_, off_, i) -> (short) arr_[off_ + i])); + } + + /*package-private*/ + abstract + HalffloatVector fromCharArray0(char[] a, int offset, VectorMask<Halffloat> m); + @ForceInline + final + <M extends VectorMask<Halffloat>> + HalffloatVector fromCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) { + m.check(species()); + HalffloatSpecies vsp = vspecies(); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, charArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> s.ldOp(arr, off, vm, + (arr_, off_, i) -> (short) arr_[off_ + i])); + } + + + @Override + abstract + HalffloatVector fromByteArray0(byte[] a, int offset); + @ForceInline + final + HalffloatVector fromByteArray0Template(byte[] a, int offset) { + HalffloatSpecies vsp = vspecies(); + return VectorSupport.load( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), + a, offset, vsp, + (arr, off, s) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + return s.ldOp(wb, off, + (wb_, o, i) -> wb_.getShort(o + i * 2)); + }); + } + + abstract + HalffloatVector fromByteArray0(byte[] a, int offset, VectorMask<Halffloat> m); + @ForceInline + final + <M extends VectorMask<Halffloat>> + HalffloatVector fromByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) { + HalffloatSpecies vsp = vspecies(); + m.check(vsp); + return VectorSupport.loadMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), m, + a, offset, vsp, + (arr, off, s, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getShort(o + i * 2)); + }); + } + + abstract + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset); + @ForceInline + final + HalffloatVector fromByteBuffer0Template(ByteBuffer bb, int offset) { + HalffloatSpecies vsp = vspecies(); + return ScopedMemoryAccess.loadFromByteBuffer( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + bb, offset, vsp, + (buf, off, s) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + return s.ldOp(wb, off, + (wb_, o, i) -> wb_.getShort(o + i * 2)); + }); + } + + abstract + HalffloatVector fromByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m); + @ForceInline + final + <M extends VectorMask<Halffloat>> + HalffloatVector fromByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) { + HalffloatSpecies vsp = vspecies(); + m.check(vsp); + return ScopedMemoryAccess.loadFromByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + bb, offset, m, vsp, + (buf, off, s, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + return s.ldOp(wb, off, vm, + (wb_, o, i) -> wb_.getShort(o + i * 2)); + }); + } + + // Unchecked storing operations in native byte order. + // Caller is responsible for applying index checks, masking, and + // byte swapping. + + abstract + void intoArray0(short[] a, int offset); + @ForceInline + final + void intoArray0Template(short[] a, int offset) { + HalffloatSpecies vsp = vspecies(); + VectorSupport.store( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + this, a, offset, + (arr, off, v) + -> v.stOp(arr, off, + (arr_, off_, i, e) -> arr_[off_+i] = e)); + } + + abstract + void intoArray0(short[] a, int offset, VectorMask<Halffloat> m); + @ForceInline + final + <M extends VectorMask<Halffloat>> + void intoArray0Template(Class<M> maskClass, short[] a, int offset, M m) { + m.check(species()); + HalffloatSpecies vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, arrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = e)); + } + + + + abstract + void intoByteArray0(byte[] a, int offset); + @ForceInline + final + void intoByteArray0Template(byte[] a, int offset) { + HalffloatSpecies vsp = vspecies(); + VectorSupport.store( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), + this, a, offset, + (arr, off, v) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + v.stOp(wb, off, + (tb_, o, i, e) -> tb_.putShort(o + i * 2, e)); + }); + } + + abstract + void intoByteArray0(byte[] a, int offset, VectorMask<Halffloat> m); + @ForceInline + final + <M extends VectorMask<Halffloat>> + void intoByteArray0Template(Class<M> maskClass, byte[] a, int offset, M m) { + HalffloatSpecies vsp = vspecies(); + m.check(vsp); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, byteArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) -> { + ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (tb_, o, i, e) -> tb_.putShort(o + i * 2, e)); + }); + } + + @ForceInline + final + void intoByteBuffer0(ByteBuffer bb, int offset) { + HalffloatSpecies vsp = vspecies(); + ScopedMemoryAccess.storeIntoByteBuffer( + vsp.vectorType(), vsp.elementType(), vsp.laneCount(), + this, bb, offset, + (buf, off, v) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + v.stOp(wb, off, + (wb_, o, i, e) -> wb_.putShort(o + i * 2, e)); + }); + } + + abstract + void intoByteBuffer0(ByteBuffer bb, int offset, VectorMask<Halffloat> m); + @ForceInline + final + <M extends VectorMask<Halffloat>> + void intoByteBuffer0Template(Class<M> maskClass, ByteBuffer bb, int offset, M m) { + HalffloatSpecies vsp = vspecies(); + m.check(vsp); + ScopedMemoryAccess.storeIntoByteBufferMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + this, m, bb, offset, + (buf, off, v, vm) -> { + ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); + v.stOp(wb, off, vm, + (wb_, o, i, e) -> wb_.putShort(o + i * 2, e)); + }); + } + + /*package-private*/ + abstract + void intoCharArray0(char[] a, int offset, VectorMask<Halffloat> m); + @ForceInline + final + <M extends VectorMask<Halffloat>> + void intoCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) { + m.check(species()); + HalffloatSpecies vsp = vspecies(); + VectorSupport.storeMasked( + vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(), + a, charArrayAddress(a, offset), + this, m, a, offset, + (arr, off, v, vm) + -> v.stOp(arr, off, vm, + (arr_, off_, i, e) -> arr_[off_ + i] = (char) e)); + } + + // End of low-level memory operations. + + private static + void checkMaskFromIndexSize(int offset, + HalffloatSpecies vsp, + VectorMask<Halffloat> m, + int scale, + int limit) { + ((AbstractMask<Halffloat>)m) + .checkIndexByLane(offset, limit, vsp.iota(), scale); + } + + @ForceInline + private void conditionalStoreNYI(int offset, + HalffloatSpecies vsp, + VectorMask<Halffloat> m, + int scale, + int limit) { + if (offset < 0 || offset + vsp.laneCount() * scale > limit) { + String msg = + String.format("unimplemented: store @%d in [0..%d), %s in %s", + offset, limit, m, vsp); + throw new AssertionError(msg); + } + } + + /*package-private*/ + @Override + @ForceInline + final + HalffloatVector maybeSwap(ByteOrder bo) { + if (bo != NATIVE_ENDIAN) { + return this.reinterpretAsBytes() + .rearrange(swapBytesShuffle()) + .reinterpretAsHalffloats(); + } + return this; + } + + static final int ARRAY_SHIFT = + 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_SHORT_INDEX_SCALE); + static final long ARRAY_BASE = + Unsafe.ARRAY_SHORT_BASE_OFFSET; + + @ForceInline + static long arrayAddress(short[] a, int index) { + return ARRAY_BASE + (((long)index) << ARRAY_SHIFT); + } + + static final int ARRAY_CHAR_SHIFT = + 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_CHAR_INDEX_SCALE); + static final long ARRAY_CHAR_BASE = + Unsafe.ARRAY_CHAR_BASE_OFFSET; + + @ForceInline + static long charArrayAddress(char[] a, int index) { + return ARRAY_CHAR_BASE + (((long)index) << ARRAY_CHAR_SHIFT); + } + + + @ForceInline + static long byteArrayAddress(byte[] a, int index) { + return Unsafe.ARRAY_BYTE_BASE_OFFSET + index; + } + + // ================================================ + + /// Reinterpreting view methods: + // lanewise reinterpret: viewAsXVector() + // keep shape, redraw lanes: reinterpretAsEs() + + /** + * {@inheritDoc} <!--workaround--> + */ + @ForceInline + @Override + public final ByteVector reinterpretAsBytes() { + // Going to ByteVector, pay close attention to byte order. + assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN); + return asByteVectorRaw(); + //return asByteVectorRaw().rearrange(swapBytesShuffle()); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @ForceInline + @Override + public final ShortVector viewAsIntegralLanes() { + LaneType ilt = LaneType.SHORT.asIntegral(); + return (ShortVector) asVectorRaw(ilt); + } + + /** + * {@inheritDoc} <!--workaround--> + * + * @implNote This method always throws + * {@code UnsupportedOperationException}, because there is no floating + * point type of the same size as {@code short}. The return type + * of this method is arbitrarily designated as + * {@code Vector<?>}. Future versions of this API may change the return + * type if additional floating point types become available. + */ + @ForceInline + @Override + public final + HalffloatVector + viewAsFloatingLanes() { + return this; + } + + // ================================================ + + /// Object methods: toString, equals, hashCode + // + // Object methods are defined as if via Arrays.toString, etc., + // is applied to the array of elements. Two equal vectors + // are required to have equal species and equal lane values. + + /** + * Returns a string representation of this vector, of the form + * {@code "[0,1,2...]"}, reporting the lane values of this vector, + * in lane order. + * + * The string is produced as if by a call to {@link + * java.util.Arrays#toString(short[]) Arrays.toString()}, + * as appropriate to the {@code short} array returned by + * {@link #toArray this.toArray()}. + * + * @return a string of the form {@code "[0,1,2...]"} + * reporting the lane values of this vector + */ + @Override + @ForceInline + public final + String toString() { + // now that toArray is strongly typed, we can define this + return Arrays.toString(toArray()); + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + boolean equals(Object obj) { + if (obj instanceof Vector) { + Vector<?> that = (Vector<?>) obj; + if (this.species().equals(that.species())) { + return this.eq(that.check(this.species())).allTrue(); + } + } + return false; + } + + /** + * {@inheritDoc} <!--workaround--> + */ + @Override + @ForceInline + public final + int hashCode() { + // now that toArray is strongly typed, we can define this + return Objects.hash(species(), Arrays.hashCode(toArray())); + } + + // ================================================ + + // Species + + /** + * Class representing {@link HalffloatVector}'s of the same {@link VectorShape VectorShape}. + */ + /*package-private*/ + static final class HalffloatSpecies extends AbstractSpecies<Halffloat> { + private HalffloatSpecies(VectorShape shape, + Class<? extends HalffloatVector> vectorType, + Class<? extends AbstractMask<Halffloat>> maskType, + Function<Object, HalffloatVector> vectorFactory) { + super(shape, LaneType.of(short.class), + vectorType, maskType, + vectorFactory); + assert(this.elementSize() == Halffloat.SIZE); + } + + // Specializing overrides: + + @Override + @ForceInline + public final Class<Halffloat> elementType() { + return Halffloat.class; + } + + @Override + @ForceInline + final Class<Halffloat> genericElementType() { + return Halffloat.class; + } + + @SuppressWarnings("unchecked") + @Override + @ForceInline + public final Class<? extends HalffloatVector> vectorType() { + return (Class<? extends HalffloatVector>) vectorType; + } + + @Override + @ForceInline + public final long checkValue(long e) { + longToElementBits(e); // only for exception + return e; + } + + /*package-private*/ + @Override + @ForceInline + final HalffloatVector broadcastBits(long bits) { + return (HalffloatVector) + VectorSupport.broadcastCoerced( + vectorType, Halffloat.class, laneCount, + bits, this, + (bits_, s_) -> s_.rvOp(i -> bits_)); + } + + /*package-private*/ + @ForceInline + final HalffloatVector broadcast(short e) { + return broadcastBits(toBits(e)); + } + + @Override + @ForceInline + public final HalffloatVector broadcast(long e) { + return broadcastBits(longToElementBits(e)); + } + + /*package-private*/ + final @Override + @ForceInline + long longToElementBits(long value) { + // Do the conversion, and then test it for failure. + short e = (short) value; + if ((long) e != value) { + throw badElementBits(value, e); + } + return toBits(e); + } + + /*package-private*/ + @ForceInline + static long toIntegralChecked(short e, boolean convertToInt) { + long value = convertToInt ? (int) e : (long) e; + if ((short) value != e) { + throw badArrayBits(e, convertToInt, value); + } + return value; + } + + /* this non-public one is for internal conversions */ + @Override + @ForceInline + final HalffloatVector fromIntValues(int[] values) { + VectorIntrinsics.requireLength(values.length, laneCount); + short[] va = new short[laneCount()]; + for (int i = 0; i < va.length; i++) { + int lv = values[i]; + short v = (short) lv; + va[i] = v; + if ((int)v != lv) { + throw badElementBits(lv, v); + } + } + return dummyVector().fromArray0(va, 0); + } + + // Virtual constructors + + @ForceInline + @Override final + public HalffloatVector fromArray(Object a, int offset) { + // User entry point: Be careful with inputs. + return HalffloatVector + .fromArray(this, (short[]) a, offset); + } + + @ForceInline + @Override final + HalffloatVector dummyVector() { + return (HalffloatVector) super.dummyVector(); + } + + /*package-private*/ + final @Override + @ForceInline + HalffloatVector rvOp(RVOp f) { + short[] res = new short[laneCount()]; + for (int i = 0; i < res.length; i++) { + short bits = (short) f.apply(i); + res[i] = fromBits(bits); + } + return dummyVector().vectorFactory(res); + } + + HalffloatVector vOp(FVOp f) { + short[] res = new short[laneCount()]; + for (int i = 0; i < res.length; i++) { + res[i] = f.apply(i); + } + return dummyVector().vectorFactory(res); + } + + HalffloatVector vOp(VectorMask<Halffloat> m, FVOp f) { + short[] res = new short[laneCount()]; + boolean[] mbits = ((AbstractMask<Halffloat>)m).getBits(); + for (int i = 0; i < res.length; i++) { + if (mbits[i]) { + res[i] = f.apply(i); + } + } + return dummyVector().vectorFactory(res); + } + + /*package-private*/ + @ForceInline + <M> HalffloatVector ldOp(M memory, int offset, + FLdOp<M> f) { + return dummyVector().ldOp(memory, offset, f); + } + + /*package-private*/ + @ForceInline + <M> HalffloatVector ldOp(M memory, int offset, + VectorMask<Halffloat> m, + FLdOp<M> f) { + return dummyVector().ldOp(memory, offset, m, f); + } + + /*package-private*/ + @ForceInline + <M> void stOp(M memory, int offset, FStOp<M> f) { + dummyVector().stOp(memory, offset, f); + } + + /*package-private*/ + @ForceInline + <M> void stOp(M memory, int offset, + AbstractMask<Halffloat> m, + FStOp<M> f) { + dummyVector().stOp(memory, offset, m, f); + } + + // N.B. Make sure these constant vectors and + // masks load up correctly into registers. + // + // Also, see if we can avoid all that switching. + // Could we cache both vectors and both masks in + // this species object? + + // Zero and iota vector access + @Override + @ForceInline + public final HalffloatVector zero() { + if ((Class<?>) vectorType() == HalffloatMaxVector.class) + return HalffloatMaxVector.ZERO; + switch (vectorBitSize()) { + case 64: return Halffloat64Vector.ZERO; + case 128: return Halffloat128Vector.ZERO; + case 256: return Halffloat256Vector.ZERO; + case 512: return Halffloat512Vector.ZERO; + } + throw new AssertionError(); + } + + @Override + @ForceInline + public final HalffloatVector iota() { + if ((Class<?>) vectorType() == HalffloatMaxVector.class) + return HalffloatMaxVector.IOTA; + switch (vectorBitSize()) { + case 64: return Halffloat64Vector.IOTA; + case 128: return Halffloat128Vector.IOTA; + case 256: return Halffloat256Vector.IOTA; + case 512: return Halffloat512Vector.IOTA; + } + throw new AssertionError(); + } + + // Mask access + @Override + @ForceInline + public final VectorMask<Halffloat> maskAll(boolean bit) { + if ((Class<?>) vectorType() == HalffloatMaxVector.class) + return HalffloatMaxVector.HalffloatMaxMask.maskAll(bit); + switch (vectorBitSize()) { + case 64: return Halffloat64Vector.Halffloat64Mask.maskAll(bit); + case 128: return Halffloat128Vector.Halffloat128Mask.maskAll(bit); + case 256: return Halffloat256Vector.Halffloat256Mask.maskAll(bit); + case 512: return Halffloat512Vector.Halffloat512Mask.maskAll(bit); + } + throw new AssertionError(); + } + } + + /** + * Finds a species for an element type of {@code short} and shape. + * + * @param s the shape + * @return a species for an element type of {@code short} and shape + * @throws IllegalArgumentException if no such species exists for the shape + */ + static HalffloatSpecies species(VectorShape s) { + Objects.requireNonNull(s); + switch (s) { + case S_64_BIT: return (HalffloatSpecies) SPECIES_64; + case S_128_BIT: return (HalffloatSpecies) SPECIES_128; + case S_256_BIT: return (HalffloatSpecies) SPECIES_256; + case S_512_BIT: return (HalffloatSpecies) SPECIES_512; + case S_Max_BIT: return (HalffloatSpecies) SPECIES_MAX; + default: throw new IllegalArgumentException("Bad shape: " + s); + } + } + + /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */ + public static final VectorSpecies<Halffloat> SPECIES_64 + = new HalffloatSpecies(VectorShape.S_64_BIT, + Halffloat64Vector.class, + Halffloat64Vector.Halffloat64Mask.class, + Halffloat64Vector::new); + + /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ + public static final VectorSpecies<Halffloat> SPECIES_128 + = new HalffloatSpecies(VectorShape.S_128_BIT, + Halffloat128Vector.class, + Halffloat128Vector.Halffloat128Mask.class, + Halffloat128Vector::new); + + /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ + public static final VectorSpecies<Halffloat> SPECIES_256 + = new HalffloatSpecies(VectorShape.S_256_BIT, + Halffloat256Vector.class, + Halffloat256Vector.Halffloat256Mask.class, + Halffloat256Vector::new); + + /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ + public static final VectorSpecies<Halffloat> SPECIES_512 + = new HalffloatSpecies(VectorShape.S_512_BIT, + Halffloat512Vector.class, + Halffloat512Vector.Halffloat512Mask.class, + Halffloat512Vector::new); + + /** Species representing {@link HalffloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ + public static final VectorSpecies<Halffloat> SPECIES_MAX + = new HalffloatSpecies(VectorShape.S_Max_BIT, + HalffloatMaxVector.class, + HalffloatMaxVector.HalffloatMaxMask.class, + HalffloatMaxVector::new); + + /** + * Preferred species for {@link HalffloatVector}s. + * A preferred species is a species of maximal bit-size for the platform. + */ + public static final VectorSpecies<Halffloat> SPECIES_PREFERRED + = (HalffloatSpecies) VectorSpecies.ofPreferred(Halffloat.class); +} diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java index d402938f5b6..893adf62b01 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java @@ -40,7 +40,8 @@ enum LaneType { BYTE(byte.class, Byte.class, byte[].class, 'I', -1, Byte.SIZE, T_BYTE), SHORT(short.class, Short.class, short[].class, 'I', -1, Short.SIZE, T_SHORT), INT(int.class, Integer.class, int[].class, 'I', -1, Integer.SIZE, T_INT), - LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, T_LONG); + LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, T_LONG), + HALFFLOAT(Halffloat.class, Short.class, short[].class, 'F', 11, Halffloat.SIZE, T_HALFFLOAT); LaneType(Class<?> elementType, Class<?> genericElementType, @@ -66,13 +67,13 @@ enum LaneType { // printName. If we do unsigned or vector or bit lane types, // report that condition also. this.typeChar = printName.toUpperCase().charAt(0); - assert("FDBSIL".indexOf(typeChar) == ordinal()) : this; + assert("FDBSILH".indexOf(typeChar) == ordinal()) : this; // Same as in JVMS, org.objectweb.asm.Opcodes, etc.: this.basicType = basicType; assert(basicType == ( (elementSizeLog2 - /*lg(Byte.SIZE)*/ 3) | (elementKind == 'F' ? 4 : 8))) : this; - assert("....zcFDBSILoav..".charAt(basicType) == typeChar); + assert("....zHFDBSILoav..".charAt(basicType) == typeChar); } final Class<?> elementType; @@ -176,13 +177,14 @@ RuntimeException badElementType(Class<?> elementType, Object expected) { // don't optimize properly; see JDK-8161245 static final int - SK_FLOAT = 1, - SK_DOUBLE = 2, - SK_BYTE = 3, - SK_SHORT = 4, - SK_INT = 5, - SK_LONG = 6, - SK_LIMIT = 7; + SK_FLOAT = 1, + SK_DOUBLE = 2, + SK_BYTE = 3, + SK_SHORT = 4, + SK_INT = 5, + SK_LONG = 6, + SK_HALFFLOAT = 7, + SK_LIMIT = 8; /*package-private*/ @ForceInline @@ -244,7 +246,8 @@ static LaneType ofBasicType(int bt) { // set up asFloating if (value.elementKind == 'F') { value.asFloating = value; - } else { + } else if (value.basicType != T_SHORT) { + // FIXME: Support asFloating for short to be Halffloat for (LaneType v : values) { if (v.elementKind == 'F' && v.elementSize == value.elementSize) { diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java index e1cada48f17..c63ac78fed2 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java @@ -4028,11 +4028,10 @@ public final ShortVector viewAsIntegralLanes() { @ForceInline @Override public final - Vector<?> + HalffloatVector viewAsFloatingLanes() { LaneType flt = LaneType.SHORT.asFloating(); - // asFloating() will throw UnsupportedOperationException for the unsupported type short - throw new AssertionError("Cannot reach here"); + return (HalffloatVector) asVectorRaw(flt); } // ================================================ diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java index d37066e6ff7..c1b90b7acd6 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java @@ -2981,6 +2981,19 @@ public abstract VectorMask<E> compare(VectorOperators.Comparison op, */ public abstract DoubleVector reinterpretAsDoubles(); + /** + * Reinterprets this vector as a vector of the same shape + * and contents but a lane type of {@code halffloat}, + * where the lanes are assembled from successive bytes + * according to little-endian order. + * It is a convenience method for the expression + * {@code reinterpretShape(species().withLanes(halffloat.class))}. + * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}. + * + * @return a {@code HalffloatVector} with the same shape and information content + */ + public abstract HalffloatVector reinterpretAsHalffloats(); + /** * Views this vector as a vector of the same shape, length, and * contents, but a lane type that is not a floating-point type. diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorIntrinsics.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorIntrinsics.java index bada3487379..84ff916f08a 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorIntrinsics.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorIntrinsics.java @@ -25,12 +25,15 @@ package jdk.incubator.vector; import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.misc.Unsafe; import java.util.Objects; /*non-public*/ class VectorIntrinsics { static final int VECTOR_ACCESS_OOB_CHECK = Integer.getInteger("jdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK", 2); + static final Unsafe U = Unsafe.getUnsafe(); + @ForceInline static void requireLength(int haveLength, int length) { if (haveLength != length) { @@ -111,4 +114,8 @@ private static int wrapToRangeNPOT(int index, int size) { return Math.floorMod(index, Math.abs(size)); } } + static <V> V maybeRebox(V v) { + U.loadFence(); + return v; + } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java index 1ffbcef821a..7166f998c9b 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java @@ -262,6 +262,9 @@ static int getMaxVectorBitSize(Class<?> etype) { // VectorSupport.getMaxLaneCount may return -1 if C2 is not enabled, // or a value smaller than the S_64_BIT.vectorBitSize / elementSizeInBits if MaxVectorSize < 16 // If so default to S_64_BIT + if (etype == Halffloat.class) { + etype = short.class; + } int maxLaneCount = VectorSupport.getMaxLaneCount(etype); int elementSizeInBits = LaneType.of(etype).elementSize; return Math.max(maxLaneCount * elementSizeInBits, S_64_BIT.vectorBitSize); diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template index 45c2cf9267c..7da1b456855 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template @@ -470,9 +470,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { public static $abstractvectortype$ zero(VectorSpecies<$Boxtype$> species) { $Type$Species vsp = ($Type$Species) species; #if[FP] +#if[short] + return VectorSupport.broadcastCoerced(vsp.vectorType(), Halffloat.class, species.length(), + toBits((short)0), vsp, + ((bits_, s_) -> s_.rvOp(i -> bits_))); +#else[short] return VectorSupport.broadcastCoerced(vsp.vectorType(), $type$.class, species.length(), toBits(0.0f), vsp, ((bits_, s_) -> s_.rvOp(i -> bits_))); +#end[short] #else[FP] return VectorSupport.broadcastCoerced(vsp.vectorType(), $type$.class, species.length(), 0, vsp, @@ -604,7 +610,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } int opc = opCode(op); return VectorSupport.unaryOp( - opc, getClass(), null, $type$.class, length(), + opc, getClass(), null, $elemtype$.class, length(), this, null, UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations)); } @@ -636,7 +642,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } int opc = opCode(op); return VectorSupport.unaryOp( - opc, getClass(), maskClass, $type$.class, length(), + opc, getClass(), maskClass, $elemtype$.class, length(), this, m, UN_IMPL.find(op, opc, $abstractvectortype$::unaryOperations)); } @@ -652,6 +658,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { case VECTOR_OP_ABS: return (v0, m) -> v0.uOp(m, (i, a) -> ($type$) Math.abs(a)); #if[FP] +#if[!short] case VECTOR_OP_SIN: return (v0, m) -> v0.uOp(m, (i, a) -> ($type$) Math.sin(a)); case VECTOR_OP_COS: return (v0, m) -> @@ -684,6 +691,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { v0.uOp(m, (i, a) -> ($type$) Math.expm1(a)); case VECTOR_OP_LOG1P: return (v0, m) -> v0.uOp(m, (i, a) -> ($type$) Math.log1p(a)); +#end[!short] #end[FP] default: return null; } @@ -744,7 +752,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), null, $type$.class, length(), + opc, getClass(), null, $elemtype$.class, length(), this, that, null, BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations)); } @@ -804,7 +812,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { int opc = opCode(op); return VectorSupport.binaryOp( - opc, getClass(), maskClass, $type$.class, length(), + opc, getClass(), maskClass, $elemtype$.class, length(), this, that, m, BIN_IMPL.find(op, opc, $abstractvectortype$::binaryOperations)); } @@ -815,6 +823,19 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { private static BinaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> binaryOperations(int opc_) { switch (opc_) { +#if[FP] +#if[short] + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() + Halffloat.valueOf(b).floatValue()))); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() - Halffloat.valueOf(b).floatValue()))); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() * Halffloat.valueOf(b).floatValue()))); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf(Math.max(Halffloat.valueOf(a).floatValue(),Halffloat.valueOf(b).floatValue()))); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> Halffloat.valueOf(Math.min(Halffloat.valueOf(a).floatValue(),Halffloat.valueOf(b).floatValue()))); +#else[short] case VECTOR_OP_ADD: return (v0, v1, vm) -> v0.bOp(v1, vm, (i, a, b) -> ($type$)(a + b)); case VECTOR_OP_SUB: return (v0, v1, vm) -> @@ -827,6 +848,21 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.max(a, b)); case VECTOR_OP_MIN: return (v0, v1, vm) -> v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.min(a, b)); +#end[short] +#else[FP] + case VECTOR_OP_ADD: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a + b)); + case VECTOR_OP_SUB: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a - b)); + case VECTOR_OP_MUL: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a * b)); + case VECTOR_OP_DIV: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)(a / b)); + case VECTOR_OP_MAX: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.max(a, b)); + case VECTOR_OP_MIN: return (v0, v1, vm) -> + v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.min(a, b)); +#end[FP] #if[BITWISE] case VECTOR_OP_AND: return (v0, v1, vm) -> v0.bOp(v1, vm, (i, a, b) -> ($type$)(a & b)); @@ -846,6 +882,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n)); #end[BITWISE] #if[FP] +#if[!short] case VECTOR_OP_OR: return (v0, v1, vm) -> v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b))); case VECTOR_OP_ATAN2: return (v0, v1, vm) -> @@ -854,6 +891,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.pow(a, b)); case VECTOR_OP_HYPOT: return (v0, v1, vm) -> v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.hypot(a, b)); +#end[!short] #end[FP] default: return null; } @@ -1026,7 +1064,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { e &= SHIFT_MASK; int opc = opCode(op); return VectorSupport.broadcastInt( - opc, getClass(), maskClass, $type$.class, length(), + opc, getClass(), maskClass, $elemtype$.class, length(), this, e, m, BIN_INT_IMPL.find(op, opc, $abstractvectortype$::broadcastIntOperations)); } @@ -1110,7 +1148,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[BITWISE] int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), null, $type$.class, length(), + opc, getClass(), null, $elemtype$.class, length(), this, that, tother, null, TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations)); } @@ -1152,7 +1190,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { #end[BITWISE] int opc = opCode(op); return VectorSupport.ternaryOp( - opc, getClass(), maskClass, $type$.class, length(), + opc, getClass(), maskClass, $elemtype$.class, length(), this, that, tother, m, TERN_IMPL.find(op, opc, $abstractvectortype$::ternaryOperations)); } @@ -1164,8 +1202,14 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { private static TernaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> ternaryOperations(int opc_) { switch (opc_) { #if[FP] +#if[short] + case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> + Halffloat.valueOf(Math.fma(Halffloat.valueOf(a).floatValue(), + Halffloat.valueOf(b).floatValue(), Halffloat.valueOf(c).floatValue()))); +#else[short] case VECTOR_OP_FMA: return (v0, v1_, v2_, m) -> v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c)); +#end[short] #end[FP] default: return null; } @@ -2189,7 +2233,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { that.check(this); int opc = opCode(op); return VectorSupport.compare( - opc, getClass(), maskType, $type$.class, length(), + opc, getClass(), maskType, $elemtype$.class, length(), this, that, null, (cond, v0, v1, m1) -> { AbstractMask<$Boxtype$> m @@ -2211,7 +2255,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { m.check(maskType, this); int opc = opCode(op); return VectorSupport.compare( - opc, getClass(), maskType, $type$.class, length(), + opc, getClass(), maskType, $elemtype$.class, length(), this, that, m, (cond, v0, v1, m1) -> { AbstractMask<$Boxtype$> cmpM @@ -2226,12 +2270,31 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline private static boolean compareWithOp(int cond, $type$ a, $type$ b) { return switch (cond) { +#if[FP] +#if[!short] case BT_eq -> a == b; case BT_ne -> a != b; case BT_lt -> a < b; case BT_le -> a <= b; case BT_gt -> a > b; case BT_ge -> a >= b; +#end[!short] +#if[short] + case BT_eq -> Halffloat.valueOf(a).floatValue() == Halffloat.valueOf(b).floatValue(); + case BT_ne -> Halffloat.valueOf(a).floatValue() != Halffloat.valueOf(b).floatValue(); + case BT_lt -> Halffloat.valueOf(a).floatValue() < Halffloat.valueOf(b).floatValue(); + case BT_le -> Halffloat.valueOf(a).floatValue() <= Halffloat.valueOf(b).floatValue(); + case BT_gt -> Halffloat.valueOf(a).floatValue() > Halffloat.valueOf(b).floatValue(); + case BT_ge -> Halffloat.valueOf(a).floatValue() >= Halffloat.valueOf(b).floatValue(); +#end[short] +#else[FP] + case BT_eq -> a == b; + case BT_ne -> a != b; + case BT_lt -> a < b; + case BT_le -> a <= b; + case BT_gt -> a > b; + case BT_ge -> a >= b; +#end[FP] #if[!FP] case BT_ult -> $Boxtype$.compareUnsigned(a, b) < 0; case BT_ule -> $Boxtype$.compareUnsigned(a, b) <= 0; @@ -2346,7 +2409,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { blendTemplate(Class<M> maskType, $abstractvectortype$ v, M m) { v.check(this); return VectorSupport.blend( - getClass(), maskType, $type$.class, length(), + getClass(), maskType, $elemtype$.class, length(), this, v, m, (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b)); } @@ -2363,7 +2426,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { // make sure VLENGTH*scale doesn't overflow: vsp.checkScale(scale); return VectorSupport.indexVector( - getClass(), $type$.class, length(), + getClass(), $elemtype$.class, length(), this, scale, vsp, (v, scale_, s) -> { @@ -2555,7 +2618,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ rearrangeTemplate(Class<S> shuffletype, S shuffle) { shuffle.checkIndexes(); return VectorSupport.rearrangeOp( - getClass(), shuffletype, null, $type$.class, length(), + getClass(), shuffletype, null, $elemtype$.class, length(), this, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); @@ -2587,7 +2650,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { throw new AssertionError(); } return VectorSupport.rearrangeOp( - getClass(), shuffletype, masktype, $type$.class, length(), + getClass(), shuffletype, masktype, $elemtype$.class, length(), this, shuffle, m, (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); @@ -2615,7 +2678,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { S ws = (S) shuffle.wrapIndexes(); $abstractvectortype$ r0 = VectorSupport.rearrangeOp( - getClass(), shuffletype, null, $type$.class, length(), + getClass(), shuffletype, null, $elemtype$.class, length(), this, ws, null, (v0, s_, m_) -> v0.uOp((i, a) -> { int ei = s_.laneSource(i); @@ -2623,7 +2686,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { })); $abstractvectortype$ r1 = VectorSupport.rearrangeOp( - getClass(), shuffletype, null, $type$.class, length(), + getClass(), shuffletype, null, $elemtype$.class, length(), v, ws, null, (v1, s_, m_) -> v1.uOp((i, a) -> { int ei = s_.laneSource(i); @@ -3026,7 +3089,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), maskClass, $type$.class, length(), + opc, getClass(), maskClass, $elemtype$.class, length(), this, m, REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations))); } @@ -3043,7 +3106,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), null, $type$.class, length(), + opc, getClass(), null, $elemtype$.class, length(), this, null, REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations))); } @@ -3054,6 +3117,27 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { private static ReductionOperation<$abstractvectortype$, VectorMask<$Boxtype$>> reductionOperations(int opc_) { switch (opc_) { +#if[FP] +#if[short] + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp(($type$)0, m, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() + Halffloat.valueOf(b).floatValue())))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp(($type$)1, m, (i, a, b) -> Halffloat.valueOf((Halffloat.valueOf(a).floatValue() * Halffloat.valueOf(b).floatValue())))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> Halffloat.valueOf(Math.min(Halffloat.valueOf(a).floatValue(), Halffloat.valueOf(b).floatValue())))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> Halffloat.valueOf(Math.max(Halffloat.valueOf(a).floatValue(), Halffloat.valueOf(b).floatValue())))); +#else[short] + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp(($type$)1, m, (i, a, b) -> ($type$)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b))); +#end[short] +#else[FP] case VECTOR_OP_ADD: return (v, m) -> toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b))); case VECTOR_OP_MUL: return (v, m) -> @@ -3062,6 +3146,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b))); case VECTOR_OP_MAX: return (v, m) -> toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b))); +#end[FP] #if[BITWISE] case VECTOR_OP_AND: return (v, m) -> toBits(v.rOp(($type$)-1, m, (i, a, b) -> ($type$)(a & b))); @@ -3373,7 +3458,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, a.length); ByteBuffer wb = wrapper(a, bo); return vsp.ldOp(wb, offset, (AbstractMask<$Boxtype$>)m, - (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$)); + (wb_, o, i) -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$)); } /** @@ -4007,7 +4092,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, bb.limit()); ByteBuffer wb = wrapper(bb, bo); return vsp.ldOp(wb, offset, (AbstractMask<$Boxtype$>)m, - (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$)); + (wb_, o, i) -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$)); } // Memory store operations @@ -4810,7 +4895,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (arr, off, s) -> { ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); return s.ldOp(wb, off, - (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$)); + (wb_, o, i) -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$)); }); } @@ -4829,7 +4914,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (arr, off, s, vm) -> { ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); return s.ldOp(wb, off, vm, - (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$)); + (wb_, o, i) -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$)); }); } @@ -4845,7 +4930,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (buf, off, s) -> { ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); return s.ldOp(wb, off, - (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$)); + (wb_, o, i) -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$)); }); } @@ -4863,7 +4948,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (buf, off, s, vm) -> { ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); return s.ldOp(wb, off, vm, - (wb_, o, i) -> wb_.get{#if[byte]?(:$Type$(}o + i * $sizeInBytes$)); + (wb_, o, i) -> wb_.get{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$)); }); } @@ -4998,7 +5083,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (arr, off, v) -> { ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); v.stOp(wb, off, - (tb_, o, i, e) -> tb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e)); + (tb_, o, i, e) -> tb_.put{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$, e)); }); } @@ -5017,7 +5102,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (arr, off, v, vm) -> { ByteBuffer wb = wrapper(arr, NATIVE_ENDIAN); v.stOp(wb, off, vm, - (tb_, o, i, e) -> tb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e)); + (tb_, o, i, e) -> tb_.put{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$, e)); }); } @@ -5031,7 +5116,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (buf, off, v) -> { ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); v.stOp(wb, off, - (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e)); + (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$, e)); }); } @@ -5049,7 +5134,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { (buf, off, v, vm) -> { ByteBuffer wb = wrapper(buf, NATIVE_ENDIAN); v.stOp(wb, off, vm, - (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Type$(}o + i * $sizeInBytes$, e)); + (wb_, o, i, e) -> wb_.put{#if[byte]?(:$Elemtype$(}o + i * $sizeInBytes$, e)); }); } @@ -5204,18 +5289,26 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline @Override public final - {#if[byteOrShort]?Vector<?>:$Fptype$Vector} +#if[FP] +#if[short] + $Type$Vector +#else[short] + $Fptype$Vector +#end[short] +#else[FP] + {#if[byte]?Vector<?>:$Fptype$Vector} +#end[FP] viewAsFloatingLanes() { #if[FP] return this; #else[FP] LaneType flt = LaneType.$TYPE$.asFloating(); -#if[!byteOrShort] +#if[!byte] return ($Fptype$Vector) asVectorRaw(flt); -#else[!byteOrShort] +#else[!byte] // asFloating() will throw UnsupportedOperationException for the unsupported type $type$ throw new AssertionError("Cannot reach here"); -#end[!byteOrShort] +#end[!byte] #end[FP] } @@ -5299,7 +5392,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @Override @ForceInline public final Class<$Boxtype$> elementType() { - return $type$.class; + return $elemtype$.class; } @Override @@ -5328,7 +5421,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { final $abstractvectortype$ broadcastBits(long bits) { return ($abstractvectortype$) VectorSupport.broadcastCoerced( - vectorType, $type$.class, laneCount, + vectorType, $elemtype$.class, laneCount, bits, this, (bits_, s_) -> s_.rvOp(i -> bits_)); } @@ -5578,5 +5671,5 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { * A preferred species is a species of maximal bit-size for the platform. */ public static final VectorSpecies<$Boxtype$> SPECIES_PREFERRED - = ($Type$Species) VectorSpecies.ofPreferred($type$.class); + = ($Type$Species) VectorSpecies.ofPreferred($elemtype$.class); } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template index df15c85fccc..381bd03b5eb 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template @@ -52,7 +52,7 @@ final class $vectortype$ extends $abstractvectortype$ { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM + static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM $vectortype$($type$[] v) { super(v); @@ -88,7 +88,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline @Override - public final Class<$Boxtype$> elementType() { return $type$.class; } + public final Class<$Boxtype$> elementType() { return $elemtype$.class; } @ForceInline @Override @@ -801,7 +801,7 @@ final class $vectortype$ extends $abstractvectortype$ { static final class $masktype$ extends AbstractMask<$Boxtype$> { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM + static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM $masktype$(boolean[] bits) { this(bits, 0); @@ -1028,7 +1028,7 @@ final class $vectortype$ extends $abstractvectortype$ { static final class $shuffletype$ extends AbstractShuffle<$Boxtype$> { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM + static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM $shuffletype$(byte[] reorder) { super(VLENGTH, reorder); diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh index 6841a47c757..5936d6e5780 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh @@ -53,10 +53,19 @@ typeprefix= globalArgs="" #globalArgs="$globalArgs -KextraOverrides" -for type in byte short int long float double +for type in byte short int long float double halffloat do + Type="$(tr '[:lower:]' '[:upper:]' <<< ${type:0:1})${type:1}" TYPE="$(tr '[:lower:]' '[:upper:]' <<< ${type})" + + case $type in + halffloat) + type=short + TYPE=SHORT + ;; + esac + args=$globalArgs args="$args -K$type -Dtype=$type -DType=$Type -DTYPE=$TYPE" @@ -72,19 +81,24 @@ do fptype=$type Fptype=$Type Boxfptype=$Boxtype + elemtype=$type + Elemtype=$Type - case $type in - byte) + case $Type in + Byte) Wideboxtype=Integer sizeInBytes=1 args="$args -KbyteOrShort" ;; - short) + Short) + fptype=halffloat + Fptype=Halffloat + Boxfptype=Halffloat Wideboxtype=Integer sizeInBytes=2 args="$args -KbyteOrShort" ;; - int) + Int) Boxtype=Integer Wideboxtype=Integer Boxbitstype=Integer @@ -94,14 +108,14 @@ do sizeInBytes=4 args="$args -KintOrLong -KintOrFP -KintOrFloat" ;; - long) + Long) fptype=double Fptype=Double Boxfptype=Double sizeInBytes=8 args="$args -KintOrLong -KlongOrDouble" ;; - float) + Float) kind=FP bitstype=int Bitstype=Int @@ -109,7 +123,7 @@ do sizeInBytes=4 args="$args -KintOrFP -KintOrFloat" ;; - double) + Double) kind=FP bitstype=long Bitstype=Long @@ -117,12 +131,23 @@ do sizeInBytes=8 args="$args -KintOrFP -KlongOrDouble" ;; + Halffloat) + kind=FP + bitstype=short + Bitstype=Short + Boxbitstype=Short + sizeInBytes=2 + elemtype=Halffloat + Elemtype=Short + args="$args -KbyteOrShort -KshortOrFP -KshortOrHalffloat" + ;; esac args="$args -K$kind -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype" args="$args -Dbitstype=$bitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype" args="$args -Dfptype=$fptype -DFptype=$Fptype -DBoxfptype=$Boxfptype" args="$args -DsizeInBytes=$sizeInBytes" + args="$args -Delemtype=$elemtype -DElemtype=$Elemtype" abstractvectortype=${typeprefix}${Type}Vector abstractbitsvectortype=${typeprefix}${Bitstype}Vector diff --git a/test/jdk/jdk/incubator/vector/AddTest.java b/test/jdk/jdk/incubator/vector/AddTest.java index bd11f0092be..68ffc6f1a27 100644 --- a/test/jdk/jdk/incubator/vector/AddTest.java +++ b/test/jdk/jdk/incubator/vector/AddTest.java @@ -27,7 +27,8 @@ * @requires vm.compiler2.enabled */ -import jdk.incubator.vector.FloatVector; +import jdk.incubator.vector.Halffloat; +import jdk.incubator.vector.HalffloatVector; import jdk.incubator.vector.VectorShape; import jdk.incubator.vector.VectorSpecies; import jdk.incubator.vector.Vector; @@ -36,30 +37,30 @@ import java.util.stream.IntStream; public class AddTest { - static final VectorSpecies<Float> SPECIES = - FloatVector.SPECIES_256; + static final VectorSpecies<Halffloat> SPECIES = + HalffloatVector.SPECIES_128; static final int SIZE = 1024; - static float[] a = new float[SIZE]; - static float[] b = new float[SIZE]; - static float[] c = new float[SIZE]; + static short[] a = new short[SIZE]; + static short[] b = new short[SIZE]; + static short[] c = new short[SIZE]; static { for (int i = 0; i < SIZE; i++) { - a[i] = 1f; - b[i] = 2f; + a[i] = Halffloat.valueOf((float)i); + b[i] = Halffloat.valueOf((float)i); } } static void workload() { for (int i = 0; i < a.length; i += SPECIES.length()) { - FloatVector av = FloatVector.fromArray(SPECIES, a, i); - FloatVector bv = FloatVector.fromArray(SPECIES, b, i); + HalffloatVector av = HalffloatVector.fromArray(SPECIES, a, i); + HalffloatVector bv = HalffloatVector.fromArray(SPECIES, b, i); av.add(bv).intoArray(c, i); } } - static final int[] IDENTITY_INDEX_MAPPING = IntStream.range(0, SPECIES.length()).toArray(); + /*static final int[] IDENTITY_INDEX_MAPPING = IntStream.range(0, SPECIES.length()).toArray(); static void workloadIndexMapped() { for (int i = 0; i < a.length; i += SPECIES.length()) { @@ -67,18 +68,24 @@ static void workloadIndexMapped() { FloatVector bv = FloatVector.fromArray(SPECIES, b, i, IDENTITY_INDEX_MAPPING, 0); av.add(bv).intoArray(c, i, IDENTITY_INDEX_MAPPING, 0); } - } + }*/ public static void main(String args[]) { for (int i = 0; i < 30_0000; i++) { workload(); } for (int i = 0; i < a.length; i++) { - if (c[i] != a[i] + b[i]) + Halffloat hfa = new Halffloat(a[i]); + Halffloat hfb = new Halffloat(b[i]); + Halffloat hfc = new Halffloat(c[i]); + + if (hfc.floatValue() != (hfa.floatValue() + hfb.floatValue())) { + System.out.println("RES: " + hfc.floatValue() + " EXPECTED: " + (hfa.floatValue() + hfb.floatValue())); throw new AssertionError(); + } } - Arrays.fill(c, 0.0f); + /*Arrays.fill(c, 0.0f); for (int i = 0; i < 30_0000; i++) { workloadIndexMapped(); @@ -86,6 +93,6 @@ public static void main(String args[]) { for (int i = 0; i < a.length; i++) { if (c[i] != a[i] + b[i]) throw new AssertionError(); - } + }*/ } }