diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp
index 66b8a43640728..8ed99d74c0b53 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@@ -997,6 +997,7 @@ class methodHandle;
                                      "I"                                                                                                       \
                                      "Ljava/lang/Object;"                                                                                      \
                                      "J"                                                                                                       \
+                                     "Z"                                                                                                       \
                                      "Ljava/lang/Object;"                                                                                      \
                                      "J"                                                                                                       \
                                      "Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;"                                                    \
@@ -1011,6 +1012,7 @@ class methodHandle;
                                             "I"                                                                                                \
                                             "Ljava/lang/Object;"                                                                               \
                                             "J"                                                                                                \
+                                            "Z"                                                                                                \
                                             "Ljdk/internal/vm/vector/VectorSupport$VectorMask;"                                                \
                                             "I"                                                                                                \
                                             "Ljava/lang/Object;"                                                                               \
@@ -1026,6 +1028,7 @@ class methodHandle;
                                       "I"                                                                                                      \
                                       "Ljava/lang/Object;"                                                                                     \
                                       "J"                                                                                                      \
+                                      "Z"                                                                                                      \
                                       "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;"                                                   \
                                       "Ljava/lang/Object;"                                                                                     \
                                       "J"                                                                                                      \
@@ -1040,6 +1043,7 @@ class methodHandle;
                                              "I"                                                                                               \
                                              "Ljava/lang/Object;"                                                                              \
                                              "J"                                                                                               \
+                                             "Z"                                                                                               \
                                              "Ljdk/internal/vm/vector/VectorSupport$Vector;"                                                   \
                                              "Ljdk/internal/vm/vector/VectorSupport$VectorMask;"                                               \
                                              "Ljava/lang/Object;"                                                                              \
diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp
index bb046d8552717..c51ee30895e8d 100644
--- a/src/hotspot/share/opto/vectorIntrinsics.cpp
+++ b/src/hotspot/share/opto/vectorIntrinsics.cpp
@@ -928,7 +928,7 @@ bool LibraryCallKit::inline_vector_frombits_coerced() {
   return true;
 }
 
-static bool elem_consistent_with_arr(BasicType elem_bt, const TypeAryPtr* arr_type) {
+static bool elem_consistent_with_arr(BasicType elem_bt, const TypeAryPtr* arr_type, bool mismatched_ms) {
   assert(arr_type != nullptr, "unexpected");
   BasicType arr_elem_bt = arr_type->elem()->array_element_basic_type();
   if (elem_bt == arr_elem_bt) {
@@ -940,41 +940,44 @@ static bool elem_consistent_with_arr(BasicType elem_bt, const TypeAryPtr* arr_ty
     // Load/store of byte vector from/to boolean[] is supported
     return true;
   } else {
-    return false;
+    return mismatched_ms;
   }
 }
 
-// public static
-// <C,
-//  VM,
-//  E,
-//  S extends VectorSpecies<E>>
-// VM load(Class<? extends VM> vmClass, Class<E> elementType, int length,
-//         Object base, long offset,    // Unsafe addressing
-//         C container, long index, S s,     // Arguments for default implementation
-//         LoadOperation<C, VM, E, S> defaultImpl)
-//
-// public static
-// <C,
-//  V extends Vector<?>>
-// void store(Class<?> vectorClass, Class<?> elementType, int length,
-//            Object base, long offset,    // Unsafe addressing
-//            V v,
-//            C container, long index,      // Arguments for default implementation
-//            StoreVectorOperation<C, V> defaultImpl)
-
+//  public static
+//  <C,
+//   VM extends VectorPayload,
+//   E,
+//   S extends VectorSpecies<E>>
+//  VM load(Class<? extends VM> vmClass, Class<E> eClass,
+//          int length,
+//          Object base, long offset,            // Unsafe addressing
+//          boolean fromSegment,
+//          C container, long index, S s,        // Arguments for default implementation
+//          LoadOperation<C, VM, S> defaultImpl) {
+//  public static
+//  <C,
+//   V extends VectorPayload>
+//  void store(Class<?> vClass, Class<?> eClass,
+//             int length,
+//             Object base, long offset,        // Unsafe addressing
+//             boolean fromSegment,
+//             V v, C container, long index,    // Arguments for default implementation
+//             StoreVectorOperation<C, V> defaultImpl) {
 bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
   const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
   const TypeInstPtr* elem_klass   = gvn().type(argument(1))->isa_instptr();
   const TypeInt*     vlen         = gvn().type(argument(2))->isa_int();
+  const TypeInt*     from_ms      = gvn().type(argument(6))->isa_int();
 
-  if (vector_klass == nullptr || elem_klass == nullptr || vlen == nullptr ||
+  if (vector_klass == nullptr || elem_klass == nullptr || vlen == nullptr || !from_ms->is_con() ||
       vector_klass->const_oop() == nullptr || elem_klass->const_oop() == nullptr || !vlen->is_con()) {
     if (C->print_intrinsics()) {
-      tty->print_cr("  ** missing constant: vclass=%s etype=%s vlen=%s",
+      tty->print_cr("  ** missing constant: vclass=%s etype=%s vlen=%s from_ms=%s",
                     NodeClassNames[argument(0)->Opcode()],
                     NodeClassNames[argument(1)->Opcode()],
-                    NodeClassNames[argument(2)->Opcode()]);
+                    NodeClassNames[argument(2)->Opcode()],
+                    NodeClassNames[argument(6)->Opcode()]);
     }
     return false; // not enough info for intrinsification
   }
@@ -1033,11 +1036,10 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
 
   const bool needs_cpu_membar = is_mixed_access || is_mismatched_access;
 
-  // Now handle special case where load/store happens from/to byte array but element type is not byte.
-  bool using_byte_array = arr_type != nullptr && arr_type->elem()->array_element_basic_type() == T_BYTE && elem_bt != T_BYTE;
-  // Handle loading masks.
-  // If there is no consistency between array and vector element types, it must be special byte array case or loading masks
-  if (arr_type != nullptr && !using_byte_array && !is_mask && !elem_consistent_with_arr(elem_bt, arr_type)) {
+  bool mismatched_ms = from_ms->get_con() && !is_mask && arr_type != nullptr && arr_type->elem()->array_element_basic_type() != elem_bt;
+  BasicType mem_elem_bt = mismatched_ms ? arr_type->elem()->array_element_basic_type() : elem_bt;
+  int mem_num_elem = mismatched_ms ? (num_elem * type2aelembytes(elem_bt)) / type2aelembytes(mem_elem_bt) : num_elem;
+  if (arr_type != nullptr && !is_mask && !elem_consistent_with_arr(elem_bt, arr_type, mismatched_ms)) {
     if (C->print_intrinsics()) {
       tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s atype=%s ismask=no",
                     is_store, is_store ? "store" : "load",
@@ -1047,19 +1049,33 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
     set_sp(old_sp);
     return false;
   }
-  // Since we are using byte array, we need to double check that the byte operations are supported by backend.
-  if (using_byte_array) {
-    int byte_num_elem = num_elem * type2aelembytes(elem_bt);
-    if (!arch_supports_vector(is_store ? Op_StoreVector : Op_LoadVector, byte_num_elem, T_BYTE, VecMaskNotUsed)
-        || !arch_supports_vector(Op_VectorReinterpret, byte_num_elem, T_BYTE, VecMaskNotUsed)) {
-      if (C->print_intrinsics()) {
-        tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d*8 etype=%s/8 ismask=no",
-                      is_store, is_store ? "store" : "load",
-                      byte_num_elem, type2name(elem_bt));
+
+  // In case of mismatched memory segment accesses, we need to double check that the source type memory operations are supported by backend.
+  if (mismatched_ms) {
+    if (is_store) {
+      if (!arch_supports_vector(Op_StoreVector, num_elem, elem_bt, VecMaskNotUsed)
+          || !arch_supports_vector(Op_VectorReinterpret, mem_num_elem, mem_elem_bt, VecMaskNotUsed)) {
+        if (C->print_intrinsics()) {
+          tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d*8 etype=%s/8 ismask=no",
+                        is_store, "store",
+                        num_elem, type2name(elem_bt));
+        }
+        set_map(old_map);
+        set_sp(old_sp);
+        return false; // not supported
+      }
+    } else {
+      if (!arch_supports_vector(Op_LoadVector, mem_num_elem, mem_elem_bt, VecMaskNotUsed)
+          || !arch_supports_vector(Op_VectorReinterpret, num_elem, elem_bt, VecMaskNotUsed)) {
+        if (C->print_intrinsics()) {
+          tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d*8 etype=%s/8 ismask=no",
+                        is_store, "load",
+                        mem_num_elem, type2name(mem_elem_bt));
+        }
+        set_map(old_map);
+        set_sp(old_sp);
+        return false; // not supported
       }
-      set_map(old_map);
-      set_sp(old_sp);
-      return false; // not supported
     }
   }
   if (is_mask) {
@@ -1085,7 +1101,7 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
   }
 
   if (is_store) {
-    Node* val = unbox_vector(argument(6), vbox_type, elem_bt, num_elem);
+    Node* val = unbox_vector(argument(7), vbox_type, elem_bt, num_elem);
     if (val == nullptr) {
       set_map(old_map);
       set_sp(old_sp);
@@ -1095,9 +1111,9 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
 
     // In case the store needs to happen to byte array, reinterpret the incoming vector to byte vector.
     int store_num_elem = num_elem;
-    if (using_byte_array) {
-      store_num_elem = num_elem * type2aelembytes(elem_bt);
-      const TypeVect* to_vect_type = TypeVect::make(T_BYTE, store_num_elem);
+    if (mismatched_ms) {
+      store_num_elem = mem_num_elem;
+      const TypeVect* to_vect_type = TypeVect::make(mem_elem_bt, store_num_elem);
       val = gvn().transform(new VectorReinterpretNode(val, val->bottom_type()->is_vect(), to_vect_type));
     }
     if (is_mask) {
@@ -1108,9 +1124,8 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
   } else {
     // When using byte array, we need to load as byte then reinterpret the value. Otherwise, do a simple vector load.
     Node* vload = nullptr;
-    if (using_byte_array) {
-      int load_num_elem = num_elem * type2aelembytes(elem_bt);
-      vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, load_num_elem, T_BYTE));
+    if (mismatched_ms) {
+      vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, mem_num_elem, mem_elem_bt));
       const TypeVect* to_vect_type = TypeVect::make(elem_bt, num_elem);
       vload = gvn().transform(new VectorReinterpretNode(vload, vload->bottom_type()->is_vect(), to_vect_type));
     } else {
@@ -1136,43 +1151,47 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
   return true;
 }
 
-// public static
-// <C,
-//  V extends Vector<?>,
-//  E,
-//  S extends VectorSpecies<E>,
-//  M extends VectorMask<E>>
-// V loadMasked(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
-//              int length, Object base, long offset, M m, int offsetInRange,
-//              C container, long index, S s,  // Arguments for default implementation
-//              LoadVectorMaskedOperation<C, V, S, M> defaultImpl) {
-//
-// public static
-// <C,
-//  V extends Vector<E>,
-//  M extends VectorMask<E>,
-//  E>
-// void storeMasked(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
-//                  int length, Object base, long offset,
-//                  V v, M m,
-//                  C container, long index,  // Arguments for default implementation
-//                  StoreVectorMaskedOperation<C, V, M, E> defaultImpl) {
-//
+//  public static
+//  <C,
+//   V extends Vector<?>,
+//   E,
+//   S extends VectorSpecies<E>,
+//   M extends VectorMask<E>>
+//  V loadMasked(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
+//               int length, Object base, long offset,          // Unsafe addressing
+//               boolean fromSegment,
+//               M m, int offsetInRange,
+//               C container, long index, S s,                  // Arguments for default implementation
+//               LoadVectorMaskedOperation<C, V, S, M> defaultImpl) {
+//  public static
+//  <C,
+//   V extends Vector<E>,
+//   M extends VectorMask<E>,
+//   E>
+//  void storeMasked(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
+//                   int length,
+//                   Object base, long offset,                  // Unsafe addressing
+//                   boolean fromSegment,
+//                   V v, M m, C container, long index,         // Arguments for default implementation
+//                   StoreVectorMaskedOperation<C, V, M> defaultImpl) {
+
 bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
   const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
   const TypeInstPtr* mask_klass   = gvn().type(argument(1))->isa_instptr();
   const TypeInstPtr* elem_klass   = gvn().type(argument(2))->isa_instptr();
   const TypeInt*     vlen         = gvn().type(argument(3))->isa_int();
+  const TypeInt*     from_ms      = gvn().type(argument(7))->isa_int();
 
   if (vector_klass == nullptr || mask_klass == nullptr || elem_klass == nullptr || vlen == nullptr ||
-      vector_klass->const_oop() == nullptr || mask_klass->const_oop() == nullptr ||
-      elem_klass->const_oop() == nullptr || !vlen->is_con()) {
+      vector_klass->const_oop() == nullptr || mask_klass->const_oop() == nullptr || from_ms == nullptr ||
+      elem_klass->const_oop() == nullptr || !vlen->is_con() || !from_ms->is_con()) {
     if (C->print_intrinsics()) {
-      tty->print_cr("  ** missing constant: vclass=%s mclass=%s etype=%s vlen=%s",
+      tty->print_cr("  ** missing constant: vclass=%s mclass=%s etype=%s vlen=%s from_ms=%s",
                     NodeClassNames[argument(0)->Opcode()],
                     NodeClassNames[argument(1)->Opcode()],
                     NodeClassNames[argument(2)->Opcode()],
-                    NodeClassNames[argument(3)->Opcode()]);
+                    NodeClassNames[argument(3)->Opcode()],
+                    NodeClassNames[argument(7)->Opcode()]);
     }
     return false; // not enough info for intrinsification
   }
@@ -1212,10 +1231,10 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
   const TypePtr *addr_type = gvn().type(addr)->isa_ptr();
   const TypeAryPtr* arr_type = addr_type->isa_aryptr();
 
-  // Now handle special case where load/store happens from/to byte array but element type is not byte.
-  bool using_byte_array = arr_type != nullptr && arr_type->elem()->array_element_basic_type() == T_BYTE && elem_bt != T_BYTE;
+  bool mismatched_ms = from_ms->get_con() && arr_type != nullptr && arr_type->elem()->array_element_basic_type() != elem_bt;
+  BIG_ENDIAN_ONLY(if (mismatched_ms) return false;)
   // If there is no consistency between array and vector element types, it must be special byte array case
-  if (arr_type != nullptr && !using_byte_array && !elem_consistent_with_arr(elem_bt, arr_type)) {
+  if (arr_type != nullptr && !elem_consistent_with_arr(elem_bt, arr_type, mismatched_ms)) {
     if (C->print_intrinsics()) {
       tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s atype=%s",
                     is_store, is_store ? "storeMasked" : "loadMasked",
@@ -1226,8 +1245,8 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
     return false;
   }
 
-  int mem_num_elem = using_byte_array ? num_elem * type2aelembytes(elem_bt) : num_elem;
-  BasicType mem_elem_bt = using_byte_array ? T_BYTE : elem_bt;
+  int mem_num_elem = mismatched_ms ? num_elem * type2aelembytes(elem_bt) : num_elem;
+  BasicType mem_elem_bt = mismatched_ms ? T_BYTE : elem_bt;
   bool supports_predicate = arch_supports_vector(is_store ? Op_StoreVectorMasked : Op_LoadVectorMasked,
                                                 mem_num_elem, mem_elem_bt, VecMaskUseLoad);
 
@@ -1240,7 +1259,7 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
       needs_predicate = true;
     } else {
       // Masked vector load with IOOBE always uses the predicated load.
-      const TypeInt* offset_in_range = gvn().type(argument(8))->isa_int();
+      const TypeInt* offset_in_range = gvn().type(argument(9))->isa_int();
       if (!offset_in_range->is_con()) {
         if (C->print_intrinsics()) {
           tty->print_cr("  ** missing constant: offsetInRange=%s",
@@ -1255,9 +1274,9 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
 
     if (needs_predicate) {
       if (C->print_intrinsics()) {
-        tty->print_cr("  ** not supported: op=%s vlen=%d etype=%s using_byte_array=%d",
+        tty->print_cr("  ** not supported: op=%s vlen=%d etype=%s mismatched_ms=%d",
                       is_store ? "storeMasked" : "loadMasked",
-                      num_elem, type2name(elem_bt), using_byte_array ? 1 : 0);
+                      num_elem, type2name(elem_bt), mismatched_ms ? 1 : 0);
       }
       set_map(old_map);
       set_sp(old_sp);
@@ -1270,8 +1289,8 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
   if (!supports_predicate && (!arch_supports_vector(Op_LoadVector, mem_num_elem, mem_elem_bt, VecMaskNotUsed) ||
       !arch_supports_vector(Op_VectorBlend, mem_num_elem, mem_elem_bt, VecMaskUseLoad))) {
     if (C->print_intrinsics()) {
-      tty->print_cr("  ** not supported: op=loadMasked vlen=%d etype=%s using_byte_array=%d",
-                    num_elem, type2name(elem_bt), using_byte_array ? 1 : 0);
+      tty->print_cr("  ** not supported: op=loadMasked vlen=%d etype=%s mismatched_ms=%d",
+                    num_elem, type2name(elem_bt), mismatched_ms ? 1 : 0);
     }
     set_map(old_map);
     set_sp(old_sp);
@@ -1280,10 +1299,10 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
 
   // Since we are using byte array, we need to double check that the vector reinterpret operation
   // with byte type is supported by backend.
-  if (using_byte_array) {
+  if (mismatched_ms) {
     if (!arch_supports_vector(Op_VectorReinterpret, mem_num_elem, T_BYTE, VecMaskNotUsed)) {
       if (C->print_intrinsics()) {
-        tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s using_byte_array=1",
+        tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s mismatched_ms=1",
                       is_store, is_store ? "storeMasked" : "loadMasked",
                       num_elem, type2name(elem_bt));
       }
@@ -1318,12 +1337,12 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
   const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
   const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
 
-  Node* mask = unbox_vector(is_store ? argument(8) : argument(7), mbox_type, elem_bt, num_elem);
+  Node* mask = unbox_vector(is_store ? argument(9) : argument(8), mbox_type, elem_bt, num_elem);
   if (mask == nullptr) {
     if (C->print_intrinsics()) {
       tty->print_cr("  ** unbox failed mask=%s",
-                    is_store ? NodeClassNames[argument(8)->Opcode()]
-                             : NodeClassNames[argument(7)->Opcode()]);
+                    is_store ? NodeClassNames[argument(9)->Opcode()]
+                             : NodeClassNames[argument(8)->Opcode()]);
     }
     set_map(old_map);
     set_sp(old_sp);
@@ -1331,11 +1350,11 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
   }
 
   if (is_store) {
-    Node* val = unbox_vector(argument(7), vbox_type, elem_bt, num_elem);
+    Node* val = unbox_vector(argument(8), vbox_type, elem_bt, num_elem);
     if (val == nullptr) {
       if (C->print_intrinsics()) {
         tty->print_cr("  ** unbox failed vector=%s",
-                      NodeClassNames[argument(7)->Opcode()]);
+                      NodeClassNames[argument(8)->Opcode()]);
       }
       set_map(old_map);
       set_sp(old_sp);
@@ -1343,7 +1362,7 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
     }
     set_all_memory(reset_memory());
 
-    if (using_byte_array) {
+    if (mismatched_ms) {
       // Reinterpret the incoming vector to byte vector.
       const TypeVect* to_vect_type = TypeVect::make(mem_elem_bt, mem_num_elem);
       val = gvn().transform(new VectorReinterpretNode(val, val->bottom_type()->is_vect(), to_vect_type));
@@ -1357,7 +1376,7 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
   } else {
     Node* vload = nullptr;
 
-    if (using_byte_array) {
+    if (mismatched_ms) {
       // Reinterpret the vector mask to byte type.
       const TypeVect* from_mask_type = TypeVect::makemask(elem_bt, num_elem);
       const TypeVect* to_mask_type = TypeVect::makemask(mem_elem_bt, mem_num_elem);
@@ -1376,7 +1395,7 @@ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
       vload = gvn().transform(new VectorBlendNode(zero, vload, mask));
     }
 
-    if (using_byte_array) {
+    if (mismatched_ms) {
       const TypeVect* to_vect_type = TypeVect::make(elem_bt, num_elem);
       vload = gvn().transform(new VectorReinterpretNode(vload, vload->bottom_type()->is_vect(), to_vect_type));
     }
@@ -1525,7 +1544,7 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) {
   const TypeAryPtr* arr_type = addr_type->isa_aryptr();
 
   // The array must be consistent with vector type
-  if (arr_type == nullptr || (arr_type != nullptr && !elem_consistent_with_arr(elem_bt, arr_type))) {
+  if (arr_type == nullptr || (arr_type != nullptr && !elem_consistent_with_arr(elem_bt, arr_type, false))) {
     if (C->print_intrinsics()) {
       tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s atype=%s ismask=no",
                     is_scatter, is_scatter ? "scatter" : "gather",
diff --git a/src/java.base/share/classes/jdk/internal/misc/X-ScopedMemoryAccess.java.template b/src/java.base/share/classes/jdk/internal/misc/X-ScopedMemoryAccess.java.template
index 0147b3bacd86e..e3f8787e9e6f4 100644
--- a/src/java.base/share/classes/jdk/internal/misc/X-ScopedMemoryAccess.java.template
+++ b/src/java.base/share/classes/jdk/internal/misc/X-ScopedMemoryAccess.java.template
@@ -355,7 +355,7 @@ public class ScopedMemoryAccess {
             session.checkValidStateRaw();
 
             return VectorSupport.load(vmClass, e, length,
-                    msp.unsafeGetBase(), msp.unsafeGetOffset() + offset,
+                    msp.unsafeGetBase(), msp.unsafeGetOffset() + offset, true,
                     msp, offset, s,
                     defaultImpl);
         } finally {
@@ -397,7 +397,7 @@ public class ScopedMemoryAccess {
             session.checkValidStateRaw();
 
             return VectorSupport.loadMasked(vmClass, maskClass, e, length,
-                    msp.unsafeGetBase(), msp.unsafeGetOffset() + offset, m, offsetInRange,
+                    msp.unsafeGetBase(), msp.unsafeGetOffset() + offset, true, m, offsetInRange,
                     msp, offset, s,
                     defaultImpl);
         } finally {
@@ -438,7 +438,7 @@ public class ScopedMemoryAccess {
             session.checkValidStateRaw();
 
             VectorSupport.store(vmClass, e, length,
-                    msp.unsafeGetBase(), msp.unsafeGetOffset() + offset,
+                    msp.unsafeGetBase(), msp.unsafeGetOffset() + offset, true,
                     v,
                     msp, offset,
                     defaultImpl);
@@ -480,7 +480,7 @@ public class ScopedMemoryAccess {
             session.checkValidStateRaw();
 
             VectorSupport.storeMasked(vmClass, maskClass, e, length,
-                    msp.unsafeGetBase(), msp.unsafeGetOffset() + offset,
+                    msp.unsafeGetBase(), msp.unsafeGetOffset() + offset, true,
                     v, m,
                     msp, offset,
                     defaultImpl);
diff --git a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
index 21415ee0484d2..ccfa006b102c2 100644
--- a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
+++ b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
@@ -421,7 +421,7 @@ public interface LoadOperation<C,
      S extends VectorSpecies<E>>
     VM load(Class<? extends VM> vmClass, Class<E> eClass,
             int length,
-            Object base, long offset,
+            Object base, long offset, boolean fromSegment,
             C container, long index, S s,
             LoadOperation<C, VM, S> defaultImpl) {
         assert isNonCapturingLambda(defaultImpl) : defaultImpl;
@@ -445,7 +445,7 @@ public interface LoadVectorMaskedOperation<C,
      S extends VectorSpecies<E>,
      M extends VectorMask<E>>
     V loadMasked(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
-                 int length, Object base, long offset,
+                 int length, Object base, long offset, boolean fromSegment,
                  M m, int offsetInRange,
                  C container, long index, S s,
                  LoadVectorMaskedOperation<C, V, S, M> defaultImpl) {
@@ -494,7 +494,7 @@ public interface StoreVectorOperation<C,
      V extends VectorPayload>
     void store(Class<?> vClass, Class<?> eClass,
                int length,
-               Object base, long offset,
+               Object base, long offset, boolean fromSegment,
                V v, C container, long index,
                StoreVectorOperation<C, V> defaultImpl) {
         assert isNonCapturingLambda(defaultImpl) : defaultImpl;
@@ -515,7 +515,7 @@ public interface StoreVectorMaskedOperation<C,
      E>
     void storeMasked(Class<? extends V> vClass, Class<M> mClass, Class<E> eClass,
                      int length,
-                     Object base, long offset,
+                     Object base, long offset, boolean fromSegment,
                      V v, M m, C container, long index,
                      StoreVectorMaskedOperation<C, V, M> defaultImpl) {
         assert isNonCapturingLambda(defaultImpl) : defaultImpl;
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractMask.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractMask.java
index 7adbd3a18524b..30297b24db06c 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractMask.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractMask.java
@@ -74,7 +74,7 @@ public void intoArray(boolean[] bits, int i) {
         i = VectorIntrinsics.checkFromIndexSize(i, laneCount, bits.length);
         VectorSupport.store(
             vsp.maskType(), vsp.elementType(), laneCount,
-            bits, (long) i + Unsafe.ARRAY_BOOLEAN_BASE_OFFSET,
+            bits, (long) i + Unsafe.ARRAY_BOOLEAN_BASE_OFFSET, false,
             this, bits, i,
             (c, idx, s) -> System.arraycopy(s.getBits(), 0, c, (int) idx, s.length()));
 
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java
index 74b9461a5cb3a..ecd162bbdb8d2 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java
@@ -3373,7 +3373,7 @@ void intoArray(byte[] a, int offset) {
         ByteSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this,
             a, offset,
             (arr, off, v)
@@ -3526,7 +3526,7 @@ void intoBooleanArray(boolean[] a, int offset) {
         ByteVector normalized = this.and((byte) 1);
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, booleanArrayAddress(a, offset),
+            a, booleanArrayAddress(a, offset), false,
             normalized,
             a, offset,
             (arr, off, v)
@@ -3737,7 +3737,7 @@ ByteVector fromArray0Template(byte[] a, int offset) {
         ByteSpecies vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> arr_[off_ + i]));
@@ -3754,7 +3754,7 @@ ByteVector fromArray0Template(Class<M> maskClass, byte[] a, int offset, M m, int
         ByteSpecies vsp = vspecies();
         return VectorSupport.loadMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset), m, offsetInRange,
+            a, arrayAddress(a, offset), false, m, offsetInRange,
             a, offset, vsp,
             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                         (arr_, off_, i) -> arr_[off_ + i]));
@@ -3771,7 +3771,7 @@ ByteVector fromBooleanArray0Template(boolean[] a, int offset) {
         ByteSpecies vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, booleanArrayAddress(a, offset),
+            a, booleanArrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
@@ -3788,7 +3788,7 @@ ByteVector fromBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset
         ByteSpecies vsp = vspecies();
         return VectorSupport.loadMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, booleanArrayAddress(a, offset), m, offsetInRange,
+            a, booleanArrayAddress(a, offset), false, m, offsetInRange,
             a, offset, vsp,
             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                         (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
@@ -3836,7 +3836,7 @@ void intoArray0Template(byte[] a, int offset) {
         ByteSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, a, offset,
             (arr, off, v)
             -> v.stOp(arr, (int) off,
@@ -3853,7 +3853,7 @@ void intoArray0Template(Class<M> maskClass, byte[] a, int offset, M m) {
         ByteSpecies vsp = vspecies();
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
@@ -3872,7 +3872,7 @@ void intoBooleanArray0Template(Class<M> maskClass, boolean[] a, int offset, M m)
         ByteVector normalized = this.and((byte) 1);
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, booleanArrayAddress(a, offset),
+            a, booleanArrayAddress(a, offset), false,
             normalized, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java
index f6e9b7b01ea3e..9b3d683520eb8 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java
@@ -3061,7 +3061,7 @@ void intoArray(double[] a, int offset) {
         DoubleSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this,
             a, offset,
             (arr, off, v)
@@ -3294,7 +3294,7 @@ DoubleVector fromArray0Template(double[] a, int offset) {
         DoubleSpecies vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> arr_[off_ + i]));
@@ -3311,7 +3311,7 @@ DoubleVector fromArray0Template(Class<M> maskClass, double[] a, int offset, M m,
         DoubleSpecies vsp = vspecies();
         return VectorSupport.loadMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset), m, offsetInRange,
+            a, arrayAddress(a, offset), false, m, offsetInRange,
             a, offset, vsp,
             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                         (arr_, off_, i) -> arr_[off_ + i]));
@@ -3413,7 +3413,7 @@ void intoArray0Template(double[] a, int offset) {
         DoubleSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, a, offset,
             (arr, off, v)
             -> v.stOp(arr, (int) off,
@@ -3430,7 +3430,7 @@ void intoArray0Template(Class<M> maskClass, double[] a, int offset, M m) {
         DoubleSpecies vsp = vspecies();
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java
index 7265243b6b93f..f9bb4a9c6300a 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java
@@ -3067,7 +3067,7 @@ void intoArray(float[] a, int offset) {
         FloatSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this,
             a, offset,
             (arr, off, v)
@@ -3281,7 +3281,7 @@ FloatVector fromArray0Template(float[] a, int offset) {
         FloatSpecies vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> arr_[off_ + i]));
@@ -3298,7 +3298,7 @@ FloatVector fromArray0Template(Class<M> maskClass, float[] a, int offset, M m, i
         FloatSpecies vsp = vspecies();
         return VectorSupport.loadMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset), m, offsetInRange,
+            a, arrayAddress(a, offset), false, m, offsetInRange,
             a, offset, vsp,
             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                         (arr_, off_, i) -> arr_[off_ + i]));
@@ -3382,7 +3382,7 @@ void intoArray0Template(float[] a, int offset) {
         FloatSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, a, offset,
             (arr, off, v)
             -> v.stOp(arr, (int) off,
@@ -3399,7 +3399,7 @@ void intoArray0Template(Class<M> maskClass, float[] a, int offset, M m) {
         FloatSpecies vsp = vspecies();
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java
index f9c913f823caf..b6e9cba081617 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java
@@ -3223,7 +3223,7 @@ void intoArray(int[] a, int offset) {
         IntSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this,
             a, offset,
             (arr, off, v)
@@ -3437,7 +3437,7 @@ IntVector fromArray0Template(int[] a, int offset) {
         IntSpecies vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> arr_[off_ + i]));
@@ -3454,7 +3454,7 @@ IntVector fromArray0Template(Class<M> maskClass, int[] a, int offset, M m, int o
         IntSpecies vsp = vspecies();
         return VectorSupport.loadMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset), m, offsetInRange,
+            a, arrayAddress(a, offset), false, m, offsetInRange,
             a, offset, vsp,
             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                         (arr_, off_, i) -> arr_[off_ + i]));
@@ -3538,7 +3538,7 @@ void intoArray0Template(int[] a, int offset) {
         IntSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, a, offset,
             (arr, off, v)
             -> v.stOp(arr, (int) off,
@@ -3555,7 +3555,7 @@ void intoArray0Template(Class<M> maskClass, int[] a, int offset, M m) {
         IntSpecies vsp = vspecies();
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java
index 9339dc8616fcd..98cf154e4a7e1 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java
@@ -3102,7 +3102,7 @@ void intoArray(long[] a, int offset) {
         LongSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this,
             a, offset,
             (arr, off, v)
@@ -3335,7 +3335,7 @@ LongVector fromArray0Template(long[] a, int offset) {
         LongSpecies vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> arr_[off_ + i]));
@@ -3352,7 +3352,7 @@ LongVector fromArray0Template(Class<M> maskClass, long[] a, int offset, M m, int
         LongSpecies vsp = vspecies();
         return VectorSupport.loadMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset), m, offsetInRange,
+            a, arrayAddress(a, offset), false, m, offsetInRange,
             a, offset, vsp,
             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                         (arr_, off_, i) -> arr_[off_ + i]));
@@ -3454,7 +3454,7 @@ void intoArray0Template(long[] a, int offset) {
         LongSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, a, offset,
             (arr, off, v)
             -> v.stOp(arr, (int) off,
@@ -3471,7 +3471,7 @@ void intoArray0Template(Class<M> maskClass, long[] a, int offset, M m) {
         LongSpecies vsp = vspecies();
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
index cfe91f6e14e65..0a0f359a40bd4 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
@@ -3372,7 +3372,7 @@ void intoArray(short[] a, int offset) {
         ShortSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this,
             a, offset,
             (arr, off, v)
@@ -3520,7 +3520,7 @@ void intoCharArray(char[] a, int offset) {
         ShortSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, charArrayAddress(a, offset),
+            a, charArrayAddress(a, offset), false,
             this,
             a, offset,
             (arr, off, v)
@@ -3723,7 +3723,7 @@ ShortVector fromArray0Template(short[] a, int offset) {
         ShortSpecies vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> arr_[off_ + i]));
@@ -3740,7 +3740,7 @@ ShortVector fromArray0Template(Class<M> maskClass, short[] a, int offset, M m, i
         ShortSpecies vsp = vspecies();
         return VectorSupport.loadMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset), m, offsetInRange,
+            a, arrayAddress(a, offset), false, m, offsetInRange,
             a, offset, vsp,
             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                         (arr_, off_, i) -> arr_[off_ + i]));
@@ -3756,7 +3756,7 @@ ShortVector fromCharArray0Template(char[] a, int offset) {
         ShortSpecies vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, charArrayAddress(a, offset),
+            a, charArrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> (short) arr_[off_ + i]));
@@ -3773,7 +3773,7 @@ ShortVector fromCharArray0Template(Class<M> maskClass, char[] a, int offset, M m
         ShortSpecies vsp = vspecies();
         return VectorSupport.loadMasked(
                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-                a, charArrayAddress(a, offset), m, offsetInRange,
+                a, charArrayAddress(a, offset), false, m, offsetInRange,
                 a, offset, vsp,
                 (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                             (arr_, off_, i) -> (short) arr_[off_ + i]));
@@ -3822,7 +3822,7 @@ void intoArray0Template(short[] a, int offset) {
         ShortSpecies vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, a, offset,
             (arr, off, v)
             -> v.stOp(arr, (int) off,
@@ -3839,7 +3839,7 @@ void intoArray0Template(Class<M> maskClass, short[] a, int offset, M m) {
         ShortSpecies vsp = vspecies();
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
@@ -3889,7 +3889,7 @@ void intoCharArray0Template(Class<M> maskClass, char[] a, int offset, M m) {
         ShortSpecies vsp = vspecies();
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, charArrayAddress(a, offset),
+            a, charArrayAddress(a, offset), false,
             this, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorMask.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorMask.java
index 40d20835534e6..f5ce894b13fc0 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorMask.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorMask.java
@@ -207,7 +207,7 @@ public static <E> VectorMask<E> fromArray(VectorSpecies<E> species, boolean[] bi
         offset = VectorIntrinsics.checkFromIndexSize(offset, laneCount, bits.length);
         return VectorSupport.load(
                 vsp.maskType(), vsp.elementType(), laneCount,
-                bits, (long) offset + Unsafe.ARRAY_BOOLEAN_BASE_OFFSET,
+                bits, (long) offset + Unsafe.ARRAY_BOOLEAN_BASE_OFFSET, false,
                 bits, offset, vsp,
                 (c, idx, s)
                   -> s.opm(n -> c[((int )idx) + n]));
diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
index 48fc5628f2523..31441207a9e8c 100644
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
@@ -4178,7 +4178,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this,
             a, offset,
             (arr, off, v)
@@ -4397,7 +4397,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, charArrayAddress(a, offset),
+            a, charArrayAddress(a, offset), false,
             this,
             a, offset,
             (arr, off, v)
@@ -4558,7 +4558,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         ByteVector normalized = this.and((byte) 1);
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, booleanArrayAddress(a, offset),
+            a, booleanArrayAddress(a, offset), false,
             normalized,
             a, offset,
             (arr, off, v)
@@ -4770,7 +4770,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> arr_[off_ + i]));
@@ -4787,7 +4787,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         return VectorSupport.loadMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset), m, offsetInRange,
+            a, arrayAddress(a, offset), false, m, offsetInRange,
             a, offset, vsp,
             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                         (arr_, off_, i) -> arr_[off_ + i]));
@@ -4864,7 +4864,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, charArrayAddress(a, offset),
+            a, charArrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> (short) arr_[off_ + i]));
@@ -4881,7 +4881,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         return VectorSupport.loadMasked(
                 vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-                a, charArrayAddress(a, offset), m, offsetInRange,
+                a, charArrayAddress(a, offset), false, m, offsetInRange,
                 a, offset, vsp,
                 (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                             (arr_, off_, i) -> (short) arr_[off_ + i]));
@@ -4898,7 +4898,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         return VectorSupport.load(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, booleanArrayAddress(a, offset),
+            a, booleanArrayAddress(a, offset), false,
             a, offset, vsp,
             (arr, off, s) -> s.ldOp(arr, (int) off,
                                     (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
@@ -4915,7 +4915,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         return VectorSupport.loadMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, booleanArrayAddress(a, offset), m, offsetInRange,
+            a, booleanArrayAddress(a, offset), false, m, offsetInRange,
             a, offset, vsp,
             (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                         (arr_, off_, i) -> (byte) (arr_[off_ + i] ? 1 : 0)));
@@ -4964,7 +4964,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         VectorSupport.store(
             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, a, offset,
             (arr, off, v)
             -> v.stOp(arr, (int) off,
@@ -4981,7 +4981,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, arrayAddress(a, offset),
+            a, arrayAddress(a, offset), false,
             this, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
@@ -5062,7 +5062,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         ByteVector normalized = this.and((byte) 1);
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, booleanArrayAddress(a, offset),
+            a, booleanArrayAddress(a, offset), false,
             normalized, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
@@ -5112,7 +5112,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
         $Type$Species vsp = vspecies();
         VectorSupport.storeMasked(
             vsp.vectorType(), maskClass, vsp.elementType(), vsp.laneCount(),
-            a, charArrayAddress(a, offset),
+            a, charArrayAddress(a, offset), false,
             this, m, a, offset,
             (arr, off, v, vm)
             -> v.stOp(arr, (int) off, vm,
diff --git a/test/micro/org/openjdk/bench/jdk/incubator/vector/TestLoadSegmentVarious.java b/test/micro/org/openjdk/bench/jdk/incubator/vector/TestLoadSegmentVarious.java
index 58cd93f3c5166..5ec17c26ca5dd 100644
--- a/test/micro/org/openjdk/bench/jdk/incubator/vector/TestLoadSegmentVarious.java
+++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/TestLoadSegmentVarious.java
@@ -55,6 +55,9 @@ public class TestLoadSegmentVarious {
     private static final VectorSpecies<Integer> INTEGER_SPECIES = VectorSpecies.ofLargestShape(int.class);
     private static final VectorSpecies<Double> DOUBLE_SPECIES = VectorSpecies.ofLargestShape(double.class);
 
+    private static final VectorMask<Integer> INTEGER_MASK = VectorMask.fromLong(INTEGER_SPECIES, (1 << (INTEGER_SPECIES.length() / 2)) - 1);
+    private static final VectorMask<Double> DOUBLE_MASK = VectorMask.fromLong(DOUBLE_SPECIES, (1 << (DOUBLE_SPECIES.length() / 2)) - 1);
+
     // Must be evenly dividable by Double.BYTES
     @Param("1024")
     private int size;
@@ -253,4 +256,27 @@ public void doubleVectorFromDoubleBackedSegment(Blackhole bh) {
         }
     }
 
+    @Benchmark
+    public void intVectorFromIntBackedSegmentMasked(Blackhole bh) {
+        for (int i = 0; i < INTEGER_SPECIES.loopBound(intSrcArray.length); i += INTEGER_SPECIES.vectorByteSize()) {
+            var v = IntVector.fromMemorySegment(INTEGER_SPECIES, doubleSegment, i, ByteOrder.nativeOrder(), INTEGER_MASK);
+            bh.consume(v);
+        }
+    }
+
+    @Benchmark
+    public void intVectorFromDoubleBackedSegmentMasked(Blackhole bh) {
+        for (int i = 0; i < INTEGER_SPECIES.loopBound(intSrcArray.length); i += INTEGER_SPECIES.vectorByteSize()) {
+            var v = IntVector.fromMemorySegment(INTEGER_SPECIES, doubleSegment, i, ByteOrder.nativeOrder(), INTEGER_MASK);
+            bh.consume(v);
+        }
+    }
+
+    @Benchmark
+    public void doubleVectorFromIntBackedSegmentMasked(Blackhole bh) {
+        for (int i = 0; i < DOUBLE_SPECIES.loopBound(doubleSrcArray.length); i += DOUBLE_SPECIES.vectorByteSize()) {
+            var v = DoubleVector.fromMemorySegment(DOUBLE_SPECIES, intSegment, i, ByteOrder.nativeOrder(), DOUBLE_MASK);
+            bh.consume(v);
+        }
+    }
 }