Skip to content

Commit 251347b

Browse files
committedMar 12, 2024
8326139: C2 SuperWord: split packs (match use/def packs, implemented, mutual independence)
Reviewed-by: kvn, vlivanov, chagedorn
1 parent e21da4c commit 251347b

File tree

5 files changed

+1242
-62
lines changed

5 files changed

+1242
-62
lines changed
 

‎src/hotspot/share/opto/superword.cpp

+305-45
Original file line numberDiff line numberDiff line change
@@ -468,10 +468,14 @@ bool SuperWord::SLP_extract() {
468468

469469
combine_pairs_to_longer_packs();
470470

471-
split_packs_longer_than_max_vector_size();
471+
construct_my_pack_map();
472+
473+
split_packs_at_use_def_boundaries(); // a first time: create natural boundaries
474+
split_packs_only_implemented_with_smaller_size();
475+
split_packs_to_break_mutual_dependence();
476+
split_packs_at_use_def_boundaries(); // again: propagate split of other packs
472477

473478
// Now we only remove packs:
474-
construct_my_pack_map();
475479
filter_packs_for_power_of_2_size();
476480
filter_packs_for_mutual_independence();
477481
filter_packs_for_alignment();
@@ -835,7 +839,9 @@ bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
835839
return false; // No vectors for this type
836840
}
837841

838-
if (isomorphic(s1, s2)) {
842+
// Forbid anything that looks like a PopulateIndex to be packed. It does not need to be packed,
843+
// and will still be vectorized by SuperWord::vector_opd.
844+
if (isomorphic(s1, s2) && !is_populate_index(s1, s2)) {
839845
if ((independent(s1, s2) && have_similar_inputs(s1, s2)) || reduction(s1, s2)) {
840846
if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
841847
if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
@@ -914,6 +920,18 @@ bool SuperWord::isomorphic(Node* s1, Node* s2) {
914920
}
915921
}
916922

923+
// Look for pattern n1 = (iv + c) and n2 = (iv + c + 1), which may lead to PopulateIndex vector node.
924+
// We skip the pack creation of these nodes. They will be vectorized by SuperWord::vector_opd.
925+
bool SuperWord::is_populate_index(const Node* n1, const Node* n2) const {
926+
return n1->is_Add() &&
927+
n2->is_Add() &&
928+
n1->in(1) == iv() &&
929+
n2->in(1) == iv() &&
930+
n1->in(2)->is_Con() &&
931+
n2->in(2)->is_Con() &&
932+
n2->in(2)->get_int() - n1->in(2)->get_int() == 1;
933+
}
934+
917935
// Is there no data path from s1 to s2 or s2 to s1?
918936
bool VLoopDependencyGraph::independent(Node* s1, Node* s2) const {
919937
int d1 = depth(s1);
@@ -1384,60 +1402,199 @@ void SuperWord::combine_pairs_to_longer_packs() {
13841402
#endif
13851403
}
13861404

1387-
void SuperWord::split_packs_longer_than_max_vector_size() {
1388-
assert(!_packset.is_empty(), "packset not empty");
1389-
DEBUG_ONLY( int old_packset_length = _packset.length(); )
1405+
SuperWord::SplitStatus SuperWord::split_pack(const char* split_name,
1406+
Node_List* pack,
1407+
SplitTask task)
1408+
{
1409+
uint pack_size = pack->size();
13901410

1391-
for (int i = 0; i < _packset.length(); i++) {
1392-
Node_List* pack = _packset.at(i);
1393-
assert(pack != nullptr, "no nullptr in packset");
1394-
uint max_vlen = max_vector_size_in_def_use_chain(pack->at(0));
1395-
assert(is_power_of_2(max_vlen), "sanity");
1396-
uint pack_size = pack->size();
1397-
if (pack_size <= max_vlen) {
1398-
continue;
1399-
}
1400-
// Split off the "upper" nodes into new packs
1401-
Node_List* new_pack = new Node_List();
1402-
for (uint j = max_vlen; j < pack_size; j++) {
1403-
Node* n = pack->at(j);
1404-
// is new_pack full?
1405-
if (new_pack->size() >= max_vlen) {
1406-
assert(is_power_of_2(new_pack->size()), "sanity %d", new_pack->size());
1407-
_packset.append(new_pack);
1408-
new_pack = new Node_List();
1409-
}
1410-
new_pack->push(n);
1411-
}
1412-
// remaining new_pack
1413-
if (new_pack->size() > 1) {
1414-
_packset.append(new_pack);
1415-
} else {
1411+
if (task.is_unchanged()) {
1412+
return SplitStatus::make_unchanged(pack);
1413+
}
1414+
1415+
if (task.is_rejected()) {
14161416
#ifndef PRODUCT
14171417
if (is_trace_superword_rejections()) {
14181418
tty->cr();
1419-
tty->print_cr("WARNING: Node dropped out of odd size pack:");
1420-
new_pack->at(0)->dump();
1419+
tty->print_cr("WARNING: Removed pack during split: %s:", task.message());
14211420
print_pack(pack);
14221421
}
14231422
#endif
1423+
for (uint i = 0; i < pack_size; i++) {
1424+
Node* n = pack->at(i);
1425+
set_my_pack(n, nullptr);
14241426
}
1425-
// truncate
1426-
while (pack->size() > max_vlen) {
1427-
pack->pop();
1427+
return SplitStatus::make_rejected();
1428+
}
1429+
1430+
uint split_size = task.split_size();
1431+
assert(0 < split_size && split_size < pack_size, "split_size must be in range");
1432+
1433+
// Split the size
1434+
uint new_size = split_size;
1435+
uint old_size = pack_size - new_size;
1436+
1437+
#ifndef PRODUCT
1438+
if (is_trace_superword_packset()) {
1439+
tty->cr();
1440+
tty->print_cr("INFO: splitting pack (sizes: %d %d): %s:",
1441+
old_size, new_size, task.message());
1442+
print_pack(pack);
1443+
}
1444+
#endif
1445+
1446+
// Are both sizes too small to be a pack?
1447+
if (old_size < 2 && new_size < 2) {
1448+
assert(old_size == 1 && new_size == 1, "implied");
1449+
#ifndef PRODUCT
1450+
if (is_trace_superword_rejections()) {
1451+
tty->cr();
1452+
tty->print_cr("WARNING: Removed size 2 pack, cannot be split: %s:", task.message());
1453+
print_pack(pack);
1454+
}
1455+
#endif
1456+
for (uint i = 0; i < pack_size; i++) {
1457+
Node* n = pack->at(i);
1458+
set_my_pack(n, nullptr);
14281459
}
1460+
return SplitStatus::make_rejected();
14291461
}
14301462

1431-
assert(old_packset_length <= _packset.length(), "we only increased the number of packs");
1463+
// Just pop off a single node?
1464+
if (new_size < 2) {
1465+
assert(new_size == 1 && old_size >= 2, "implied");
1466+
Node* n = pack->pop();
1467+
set_my_pack(n, nullptr);
1468+
#ifndef PRODUCT
1469+
if (is_trace_superword_rejections()) {
1470+
tty->cr();
1471+
tty->print_cr("WARNING: Removed node from pack, because of split: %s:", task.message());
1472+
n->dump();
1473+
}
1474+
#endif
1475+
return SplitStatus::make_modified(pack);
1476+
}
1477+
1478+
// Just remove a single node at front?
1479+
if (old_size < 2) {
1480+
assert(old_size == 1 && new_size >= 2, "implied");
1481+
Node* n = pack->at(0);
1482+
pack->remove(0);
1483+
set_my_pack(n, nullptr);
1484+
#ifndef PRODUCT
1485+
if (is_trace_superword_rejections()) {
1486+
tty->cr();
1487+
tty->print_cr("WARNING: Removed node from pack, because of split: %s:", task.message());
1488+
n->dump();
1489+
}
1490+
#endif
1491+
return SplitStatus::make_modified(pack);
1492+
}
1493+
1494+
// We will have two packs
1495+
assert(old_size >= 2 && new_size >= 2, "implied");
1496+
Node_List* new_pack = new Node_List(new_size);
1497+
1498+
for (uint i = 0; i < new_size; i++) {
1499+
Node* n = pack->at(old_size + i);
1500+
new_pack->push(n);
1501+
set_my_pack(n, new_pack);
1502+
}
1503+
1504+
for (uint i = 0; i < new_size; i++) {
1505+
pack->pop();
1506+
}
1507+
1508+
// We assume that new_pack is more "stable" (i.e. will have to be split less than new_pack).
1509+
// Put "pack" second, so that we insert it later in the list, and iterate over it again sooner.
1510+
return SplitStatus::make_split(new_pack, pack);
1511+
}
1512+
1513+
template <typename SplitStrategy>
1514+
void SuperWord::split_packs(const char* split_name,
1515+
SplitStrategy strategy) {
1516+
bool changed;
1517+
do {
1518+
changed = false;
1519+
int new_packset_length = 0;
1520+
for (int i = 0; i < _packset.length(); i++) {
1521+
Node_List* pack = _packset.at(i);
1522+
assert(pack != nullptr && pack->size() >= 2, "no nullptr, at least size 2");
1523+
SplitTask task = strategy(pack);
1524+
SplitStatus status = split_pack(split_name, pack, task);
1525+
changed |= !status.is_unchanged();
1526+
Node_List* first_pack = status.first_pack();
1527+
Node_List* second_pack = status.second_pack();
1528+
_packset.at_put(i, nullptr); // take out pack
1529+
if (first_pack != nullptr) {
1530+
// The first pack can be put at the current position
1531+
assert(i >= new_packset_length, "only move packs down");
1532+
_packset.at_put(new_packset_length++, first_pack);
1533+
}
1534+
if (second_pack != nullptr) {
1535+
// The second node has to be appended at the end
1536+
_packset.append(second_pack);
1537+
}
1538+
}
1539+
_packset.trunc_to(new_packset_length);
1540+
} while (changed);
14321541

14331542
#ifndef PRODUCT
14341543
if (is_trace_superword_packset()) {
1435-
tty->print_cr("\nAfter Superword::split_packs_longer_than_max_vector_size");
1544+
tty->print_cr("\nAfter %s", split_name);
14361545
print_packset();
14371546
}
14381547
#endif
14391548
}
14401549

1550+
// Split packs at boundaries where left and right have different use or def packs.
1551+
void SuperWord::split_packs_at_use_def_boundaries() {
1552+
split_packs("SuperWord::split_packs_at_use_def_boundaries",
1553+
[&](const Node_List* pack) {
1554+
uint pack_size = pack->size();
1555+
uint boundary = find_use_def_boundary(pack);
1556+
assert(boundary < pack_size, "valid boundary %d", boundary);
1557+
if (boundary != 0) {
1558+
return SplitTask::make_split(pack_size - boundary, "found a use/def boundary");
1559+
}
1560+
return SplitTask::make_unchanged();
1561+
});
1562+
}
1563+
1564+
// Split packs that are only implemented with a smaller pack size. Also splits packs
1565+
// such that they eventually have power of 2 size.
1566+
void SuperWord::split_packs_only_implemented_with_smaller_size() {
1567+
split_packs("SuperWord::split_packs_only_implemented_with_smaller_size",
1568+
[&](const Node_List* pack) {
1569+
uint pack_size = pack->size();
1570+
uint implemented_size = max_implemented_size(pack);
1571+
if (implemented_size == 0) {
1572+
return SplitTask::make_rejected("not implemented at any smaller size");
1573+
}
1574+
assert(is_power_of_2(implemented_size), "power of 2 size or zero: %d", implemented_size);
1575+
if (implemented_size != pack_size) {
1576+
return SplitTask::make_split(implemented_size, "only implemented at smaller size");
1577+
}
1578+
return SplitTask::make_unchanged();
1579+
});
1580+
}
1581+
1582+
// Split packs that have a mutual dependency, until all packs are mutually_independent.
1583+
void SuperWord::split_packs_to_break_mutual_dependence() {
1584+
split_packs("SuperWord::split_packs_to_break_mutual_dependence",
1585+
[&](const Node_List* pack) {
1586+
uint pack_size = pack->size();
1587+
assert(is_power_of_2(pack_size), "ensured by earlier splits %d", pack_size);
1588+
if (!is_marked_reduction(pack->at(0)) &&
1589+
!mutually_independent(pack)) {
1590+
// As a best guess, we split the pack in half. This way, we iteratively make the
1591+
// packs smaller, until there is no dependency.
1592+
return SplitTask::make_split(pack_size >> 1, "was not mutually independent");
1593+
}
1594+
return SplitTask::make_unchanged();
1595+
});
1596+
}
1597+
14411598
template <typename FilterPredicate>
14421599
void SuperWord::filter_packs(const char* filter_name,
14431600
const char* error_message,
@@ -1642,7 +1799,7 @@ void SuperWord::filter_packs_for_implemented() {
16421799
filter_packs("SuperWord::filter_packs_for_implemented",
16431800
"Unimplemented",
16441801
[&](const Node_List* pack) {
1645-
return implemented(pack);
1802+
return implemented(pack, pack->size());
16461803
});
16471804
}
16481805

@@ -1664,7 +1821,7 @@ void SuperWord::filter_packs_for_profitable() {
16641821
while (true) {
16651822
int old_packset_length = _packset.length();
16661823
filter_packs(nullptr, // don't dump each time
1667-
"size is not a power of 2",
1824+
"not profitable",
16681825
[&](const Node_List* pack) {
16691826
return profitable(pack);
16701827
});
@@ -1683,14 +1840,13 @@ void SuperWord::filter_packs_for_profitable() {
16831840
#endif
16841841
}
16851842

1686-
//------------------------------implemented---------------------------
1687-
// Can code be generated for pack p?
1688-
bool SuperWord::implemented(const Node_List* p) {
1843+
// Can code be generated for the pack, restricted to size nodes?
1844+
bool SuperWord::implemented(const Node_List* pack, uint size) {
1845+
assert(size >= 2 && size <= pack->size() && is_power_of_2(size), "valid size");
16891846
bool retValue = false;
1690-
Node* p0 = p->at(0);
1847+
Node* p0 = pack->at(0);
16911848
if (p0 != nullptr) {
16921849
int opc = p0->Opcode();
1693-
uint size = p->size();
16941850
if (is_marked_reduction(p0)) {
16951851
const Type *arith_type = p0->bottom_type();
16961852
// Length 2 reductions of INT/LONG do not offer performance benefits
@@ -1732,6 +1888,22 @@ bool SuperWord::implemented(const Node_List* p) {
17321888
return retValue;
17331889
}
17341890

1891+
// Find the maximal implemented size smaller or equal to the packs size
1892+
uint SuperWord::max_implemented_size(const Node_List* pack) {
1893+
uint size = round_down_power_of_2(pack->size());
1894+
if (implemented(pack, size)) {
1895+
return size;
1896+
} else {
1897+
// Iteratively divide size by 2, and check.
1898+
for (uint s = size >> 1; s >= 2; s >>= 1) {
1899+
if (implemented(pack, s)) {
1900+
return s;
1901+
}
1902+
}
1903+
return 0; // not implementable at all
1904+
}
1905+
}
1906+
17351907
bool SuperWord::requires_long_to_int_conversion(int opc) {
17361908
switch(opc) {
17371909
case Op_PopCountL:
@@ -2763,6 +2935,94 @@ void SuperWord::verify_no_extract() {
27632935
}
27642936
#endif
27652937

2938+
// Check if n_super's pack uses are a superset of n_sub's pack uses.
2939+
bool SuperWord::has_use_pack_superset(const Node* n_super, const Node* n_sub) const {
2940+
Node_List* pack = my_pack(n_super);
2941+
assert(pack != nullptr && pack == my_pack(n_sub), "must have the same pack");
2942+
2943+
// For all uses of n_sub that are in a pack (use_sub) ...
2944+
for (DUIterator_Fast jmax, j = n_sub->fast_outs(jmax); j < jmax; j++) {
2945+
Node* use_sub = n_sub->fast_out(j);
2946+
Node_List* pack_use_sub = my_pack(use_sub);
2947+
if (pack_use_sub == nullptr) { continue; }
2948+
2949+
// ... and all input edges: use_sub->in(i) == n_sub.
2950+
uint start, end;
2951+
VectorNode::vector_operands(use_sub, &start, &end);
2952+
for (uint i = start; i < end; i++) {
2953+
if (use_sub->in(i) != n_sub) { continue; }
2954+
2955+
// Check if n_super has any use use_super in the same pack ...
2956+
bool found = false;
2957+
for (DUIterator_Fast kmax, k = n_super->fast_outs(kmax); k < kmax; k++) {
2958+
Node* use_super = n_super->fast_out(k);
2959+
Node_List* pack_use_super = my_pack(use_super);
2960+
if (pack_use_sub != pack_use_super) { continue; }
2961+
2962+
// ... and where there is an edge use_super->in(i) == n_super.
2963+
// For MulAddS2I it is expected to have defs over different input edges.
2964+
if (use_super->in(i) != n_super && !VectorNode::is_muladds2i(use_super)) { continue; }
2965+
2966+
found = true;
2967+
break;
2968+
}
2969+
if (!found) {
2970+
// n_sub has a use-edge (use_sub->in(i) == n_sub) with use_sub in a packset,
2971+
// but n_super does not have any edge (use_super->in(i) == n_super) with
2972+
// use_super in the same packset. Hence, n_super does not have a use pack
2973+
// superset of n_sub.
2974+
return false;
2975+
}
2976+
}
2977+
}
2978+
// n_super has all edges that n_sub has.
2979+
return true;
2980+
}
2981+
2982+
// Find a boundary in the pack, where left and right have different pack uses and defs.
2983+
// This is a natural boundary to split a pack, to ensure that use and def packs match.
2984+
// If no boundary is found, return zero.
2985+
uint SuperWord::find_use_def_boundary(const Node_List* pack) const {
2986+
Node* p0 = pack->at(0);
2987+
Node* p1 = pack->at(1);
2988+
2989+
const bool is_reduction_pack = reduction(p0, p1);
2990+
2991+
// Inputs range
2992+
uint start, end;
2993+
VectorNode::vector_operands(p0, &start, &end);
2994+
2995+
for (int i = pack->size() - 2; i >= 0; i--) {
2996+
// For all neighbours
2997+
Node* n0 = pack->at(i + 0);
2998+
Node* n1 = pack->at(i + 1);
2999+
3000+
3001+
// 1. Check for matching defs
3002+
for (uint j = start; j < end; j++) {
3003+
Node* n0_in = n0->in(j);
3004+
Node* n1_in = n1->in(j);
3005+
// No boundary if:
3006+
// 1) the same packs OR
3007+
// 2) reduction edge n0->n1 or n1->n0
3008+
if (my_pack(n0_in) != my_pack(n1_in) &&
3009+
!((n0 == n1_in || n1 == n0_in) && is_reduction_pack)) {
3010+
return i + 1;
3011+
}
3012+
}
3013+
3014+
// 2. Check for matching uses: equal if both are superset of the other.
3015+
// Reductions have no pack uses, so they match trivially on the use packs.
3016+
if (!is_reduction_pack &&
3017+
!(has_use_pack_superset(n0, n1) &&
3018+
has_use_pack_superset(n1, n0))) {
3019+
return i + 1;
3020+
}
3021+
}
3022+
3023+
return 0;
3024+
}
3025+
27663026
//------------------------------is_vector_use---------------------------
27673027
// Is use->in(u_idx) a vector use?
27683028
bool SuperWord::is_vector_use(Node* use, int u_idx) {

‎src/hotspot/share/opto/superword.hpp

+111-4
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ class SuperWord : public ResourceObj {
245245

246246
// my_pack
247247
public:
248-
Node_List* my_pack(Node* n) { return !in_bb(n) ? nullptr : _node_info.adr_at(bb_idx(n))->_my_pack; }
248+
Node_List* my_pack(const Node* n) const { return !in_bb(n) ? nullptr : _node_info.adr_at(bb_idx(n))->_my_pack; }
249249
private:
250250
void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
251251
// is pack good for converting into one vector node replacing bunches of Cmp, Bool, CMov nodes.
@@ -273,6 +273,8 @@ class SuperWord : public ResourceObj {
273273
bool are_adjacent_refs(Node* s1, Node* s2);
274274
// Are s1 and s2 similar?
275275
bool isomorphic(Node* s1, Node* s2);
276+
// Do we have pattern n1 = (iv + c) and n2 = (iv + c + 1)?
277+
bool is_populate_index(const Node* n1, const Node* n2) const;
276278
// For a node pair (s1, s2) which is isomorphic and independent,
277279
// do s1 and s2 have similar input edges?
278280
bool have_similar_inputs(Node* s1, Node* s2);
@@ -295,7 +297,102 @@ class SuperWord : public ResourceObj {
295297
// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
296298
void combine_pairs_to_longer_packs();
297299

298-
void split_packs_longer_than_max_vector_size();
300+
class SplitTask {
301+
private:
302+
enum Kind {
303+
// The lambda method for split_packs can return one of these tasks:
304+
Unchanged, // The pack is left in the packset, unchanged.
305+
Rejected, // The pack is removed from the packset.
306+
Split, // Split away split_size nodes from the end of the pack.
307+
};
308+
const Kind _kind;
309+
const uint _split_size;
310+
const char* _message;
311+
312+
SplitTask(const Kind kind, const uint split_size, const char* message) :
313+
_kind(kind), _split_size(split_size), _message(message)
314+
{
315+
assert(message != nullptr, "must have message");
316+
assert(_kind != Unchanged || split_size == 0, "unchanged task conditions");
317+
assert(_kind != Rejected || split_size == 0, "reject task conditions");
318+
assert(_kind != Split || split_size != 0, "split task conditions");
319+
}
320+
321+
public:
322+
static SplitTask make_split(const uint split_size, const char* message) {
323+
return SplitTask(Split, split_size, message);
324+
}
325+
326+
static SplitTask make_unchanged() {
327+
return SplitTask(Unchanged, 0, "unchanged");
328+
}
329+
330+
static SplitTask make_rejected(const char* message) {
331+
return SplitTask(Rejected, 0, message);
332+
}
333+
334+
bool is_unchanged() const { return _kind == Unchanged; }
335+
bool is_rejected() const { return _kind == Rejected; }
336+
bool is_split() const { return _kind == Split; }
337+
const char* message() const { return _message; }
338+
339+
uint split_size() const {
340+
assert(is_split(), "only split tasks have split_size");
341+
return _split_size;
342+
}
343+
};
344+
345+
class SplitStatus {
346+
private:
347+
enum Kind {
348+
// After split_pack, we have: first_pack second_pack
349+
Unchanged, // The pack is left in the pack, unchanged. old_pack nullptr
350+
Rejected, // The pack is removed from the packset. nullptr nullptr
351+
Modified, // The pack had some nodes removed. old_pack nullptr
352+
Split, // The pack was split into two packs. pack1 pack2
353+
};
354+
Kind _kind;
355+
Node_List* _first_pack;
356+
Node_List* _second_pack;
357+
358+
SplitStatus(Kind kind, Node_List* first_pack, Node_List* second_pack) :
359+
_kind(kind), _first_pack(first_pack), _second_pack(second_pack)
360+
{
361+
assert(_kind != Unchanged || (first_pack != nullptr && second_pack == nullptr), "unchanged status conditions");
362+
assert(_kind != Rejected || (first_pack == nullptr && second_pack == nullptr), "rejected status conditions");
363+
assert(_kind != Modified || (first_pack != nullptr && second_pack == nullptr), "modified status conditions");
364+
assert(_kind != Split || (first_pack != nullptr && second_pack != nullptr), "split status conditions");
365+
}
366+
367+
public:
368+
static SplitStatus make_unchanged(Node_List* old_pack) {
369+
return SplitStatus(Unchanged, old_pack, nullptr);
370+
}
371+
372+
static SplitStatus make_rejected() {
373+
return SplitStatus(Rejected, nullptr, nullptr);
374+
}
375+
376+
static SplitStatus make_modified(Node_List* first_pack) {
377+
return SplitStatus(Modified, first_pack, nullptr);
378+
}
379+
380+
static SplitStatus make_split(Node_List* first_pack, Node_List* second_pack) {
381+
return SplitStatus(Split, first_pack, second_pack);
382+
}
383+
384+
bool is_unchanged() const { return _kind == Unchanged; }
385+
Node_List* first_pack() const { return _first_pack; }
386+
Node_List* second_pack() const { return _second_pack; }
387+
};
388+
389+
SplitStatus split_pack(const char* split_name, Node_List* pack, SplitTask task);
390+
template <typename SplitStrategy>
391+
void split_packs(const char* split_name, SplitStrategy strategy);
392+
393+
void split_packs_at_use_def_boundaries();
394+
void split_packs_only_implemented_with_smaller_size();
395+
void split_packs_to_break_mutual_dependence();
299396

300397
// Filter out packs with various filter predicates
301398
template <typename FilterPredicate>
@@ -328,14 +425,24 @@ class SuperWord : public ResourceObj {
328425
bool output();
329426
// Create a vector operand for the nodes in pack p for operand: in(opd_idx)
330427
Node* vector_opd(Node_List* p, int opd_idx);
331-
// Can code be generated for pack p?
332-
bool implemented(const Node_List* p);
428+
429+
// Can code be generated for the pack, restricted to size nodes?
430+
bool implemented(const Node_List* pack, uint size);
431+
// Find the maximal implemented size smaller or equal to the packs size
432+
uint max_implemented_size(const Node_List* pack);
433+
333434
// For pack p, are all operands and all uses (with in the block) vector?
334435
bool profitable(const Node_List* p);
335436
// Verify that all uses of packs are also packs, i.e. we do not need extract operations.
336437
DEBUG_ONLY(void verify_no_extract();)
438+
439+
// Check if n_super's pack uses are a superset of n_sub's pack uses.
440+
bool has_use_pack_superset(const Node* n1, const Node* n2) const;
441+
// Find a boundary in the pack, where left and right have different pack uses and defs.
442+
uint find_use_def_boundary(const Node_List* pack) const;
337443
// Is use->in(u_idx) a vector use?
338444
bool is_vector_use(Node* use, int u_idx);
445+
339446
// Initialize per node info
340447
void initialize_node_info();
341448
// Compute max depth for expressions from beginning of block

‎test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java

+49-7
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,18 @@ public class TestMulAddS2I {
4141

4242
static short[] sArr1 = new short[RANGE];
4343
static short[] sArr2 = new short[RANGE];
44-
static final int[] GOLDEN;
44+
static final int[] GOLDEN_A;
45+
static final int[] GOLDEN_B;
46+
static final int[] GOLDEN_C;
4547

4648
static {
4749
for (int i = 0; i < RANGE; i++) {
4850
sArr1[i] = (short)(AbstractInfo.getRandom().nextInt());
4951
sArr2[i] = (short)(AbstractInfo.getRandom().nextInt());
5052
}
51-
GOLDEN = test();
53+
GOLDEN_A = testa();
54+
GOLDEN_B = testb();
55+
GOLDEN_C = testc();
5256
}
5357

5458

@@ -61,15 +65,17 @@ public static void main(String[] args) {
6165
}
6266
}
6367

64-
@Run(test = "test")
68+
@Run(test = {"testa", "testb", "testc"})
6569
@Warmup(0)
6670
public static void run() {
67-
compare(test());
71+
compare(testa(), GOLDEN_A, "testa");
72+
compare(testb(), GOLDEN_B, "testb");
73+
compare(testb(), GOLDEN_C, "testc");
6874
}
6975

70-
public static void compare(int[] out) {
76+
public static void compare(int[] out, int[] golden, String name) {
7177
for (int i = 0; i < ITER; i++) {
72-
Asserts.assertEQ(out[i], GOLDEN[i], "wrong result for out[" + i + "]");
78+
Asserts.assertEQ(out[i], golden[i], "wrong result for '" + name + "' out[" + i + "]");
7379
}
7480
}
7581

@@ -82,7 +88,7 @@ public static void compare(int[] out) {
8288
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
8389
@IR(applyIfCPUFeature = {"avx512_vnni", "true"},
8490
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
85-
public static int[] test() {
91+
public static int[] testa() {
8692
int[] out = new int[ITER];
8793
int[] out2 = new int[ITER];
8894
for (int i = 0; i < ITER; i++) {
@@ -91,4 +97,40 @@ public static int[] test() {
9197
}
9298
return out;
9399
}
100+
101+
@Test
102+
@IR(applyIfCPUFeature = {"sse2", "true"},
103+
applyIfPlatform = {"64-bit", "true"},
104+
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
105+
@IR(applyIfCPUFeature = {"asimd", "true"},
106+
applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
107+
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
108+
@IR(applyIfCPUFeature = {"avx512_vnni", "true"},
109+
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
110+
public static int[] testb() {
111+
int[] out = new int[ITER];
112+
int[] out2 = new int[ITER];
113+
for (int i = 0; i < ITER; i++) {
114+
out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1]));
115+
out2[i] += out[i];
116+
}
117+
return out;
118+
}
119+
120+
@Test
121+
@IR(applyIfCPUFeature = {"sse2", "true"},
122+
applyIfPlatform = {"64-bit", "true"},
123+
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
124+
@IR(applyIfCPUFeature = {"asimd", "true"},
125+
applyIf = {"MaxVectorSize", "16"}, // AD file requires vector_length = 16
126+
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI, "> 0"})
127+
@IR(applyIfCPUFeature = {"avx512_vnni", "true"},
128+
counts = {IRNode.MUL_ADD_S2I, "> 0", IRNode.MUL_ADD_VS2VI_VNNI, "> 0"})
129+
public static int[] testc() {
130+
int[] out = new int[ITER];
131+
for (int i = 0; i < ITER; i++) {
132+
out[i] += ((sArr1[2*i] * sArr2[2*i]) + (sArr1[2*i+1] * sArr2[2*i+1]));
133+
}
134+
return out;
135+
}
94136
}

‎test/hotspot/jtreg/compiler/loopopts/superword/TestSplitPacks.java

+766
Large diffs are not rendered by default.

‎test/hotspot/jtreg/compiler/vectorization/runner/LoopArrayIndexComputeTest.java

+11-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 2022, 2023, Arm Limited. All rights reserved.
3-
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
66
* This code is free software; you can redistribute it and/or modify it
@@ -290,7 +290,8 @@ public short[] shortArrayWithDependenceNeg() {
290290
// No true dependency in read-forward case.
291291
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
292292
applyIf = {"AlignVector", "false"},
293-
counts = {IRNode.STORE_VECTOR, ">0"})
293+
counts = {IRNode.STORE_VECTOR, ">0",
294+
IRNode.MUL_VS, ">0"}) // expect maximum size
294295
public char[] charArrayWithDependencePos() {
295296
char[] res = new char[SIZE];
296297
System.arraycopy(chars, 0, res, 0, SIZE);
@@ -301,8 +302,10 @@ public char[] charArrayWithDependencePos() {
301302
}
302303

303304
@Test
304-
// Note that this case cannot be vectorized due to data dependence.
305-
@IR(failOn = {IRNode.STORE_VECTOR})
305+
// Data dependency at distance 2: restrict vector size to 2
306+
@IR(applyIfCPUFeatureOr = {"sse2", "true"},
307+
counts = {IRNode.STORE_VECTOR, ">0",
308+
IRNode.MUL_VS, IRNode.VECTOR_SIZE_2, ">0"}) // size 2 only
306309
public char[] charArrayWithDependenceNeg() {
307310
char[] res = new char[SIZE];
308311
System.arraycopy(chars, 0, res, 0, SIZE);
@@ -354,8 +357,10 @@ public boolean[] booleanArrayWithDependencePos() {
354357
}
355358

356359
@Test
357-
// Note that this case cannot be vectorized due to data dependence.
358-
@IR(failOn = {IRNode.STORE_VECTOR})
360+
// Data dependency at distance 4: restrict vector size to 4
361+
@IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
362+
counts = {IRNode.STORE_VECTOR, ">0",
363+
IRNode.OR_VB, IRNode.VECTOR_SIZE_4, ">0"}) // size 4 only
359364
public boolean[] booleanArrayWithDependenceNeg() {
360365
boolean[] res = new boolean[SIZE];
361366
System.arraycopy(booleans, 0, res, 0, SIZE);

0 commit comments

Comments
 (0)
Please sign in to comment.