@@ -468,10 +468,14 @@ bool SuperWord::SLP_extract() {
468
468
469
469
combine_pairs_to_longer_packs ();
470
470
471
- split_packs_longer_than_max_vector_size ();
471
+ construct_my_pack_map ();
472
+
473
+ split_packs_at_use_def_boundaries (); // a first time: create natural boundaries
474
+ split_packs_only_implemented_with_smaller_size ();
475
+ split_packs_to_break_mutual_dependence ();
476
+ split_packs_at_use_def_boundaries (); // again: propagate split of other packs
472
477
473
478
// Now we only remove packs:
474
- construct_my_pack_map ();
475
479
filter_packs_for_power_of_2_size ();
476
480
filter_packs_for_mutual_independence ();
477
481
filter_packs_for_alignment ();
@@ -835,7 +839,9 @@ bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
835
839
return false ; // No vectors for this type
836
840
}
837
841
838
- if (isomorphic (s1, s2)) {
842
+ // Forbid anything that looks like a PopulateIndex to be packed. It does not need to be packed,
843
+ // and will still be vectorized by SuperWord::vector_opd.
844
+ if (isomorphic (s1, s2) && !is_populate_index (s1, s2)) {
839
845
if ((independent (s1, s2) && have_similar_inputs (s1, s2)) || reduction (s1, s2)) {
840
846
if (!exists_at (s1, 0 ) && !exists_at (s2, 1 )) {
841
847
if (!s1->is_Mem () || are_adjacent_refs (s1, s2)) {
@@ -914,6 +920,18 @@ bool SuperWord::isomorphic(Node* s1, Node* s2) {
914
920
}
915
921
}
916
922
923
+ // Look for pattern n1 = (iv + c) and n2 = (iv + c + 1), which may lead to PopulateIndex vector node.
924
+ // We skip the pack creation of these nodes. They will be vectorized by SuperWord::vector_opd.
925
+ bool SuperWord::is_populate_index (const Node* n1, const Node* n2) const {
926
+ return n1->is_Add () &&
927
+ n2->is_Add () &&
928
+ n1->in (1 ) == iv () &&
929
+ n2->in (1 ) == iv () &&
930
+ n1->in (2 )->is_Con () &&
931
+ n2->in (2 )->is_Con () &&
932
+ n2->in (2 )->get_int () - n1->in (2 )->get_int () == 1 ;
933
+ }
934
+
917
935
// Is there no data path from s1 to s2 or s2 to s1?
918
936
bool VLoopDependencyGraph::independent (Node* s1, Node* s2) const {
919
937
int d1 = depth (s1);
@@ -1384,60 +1402,199 @@ void SuperWord::combine_pairs_to_longer_packs() {
1384
1402
#endif
1385
1403
}
1386
1404
1387
- void SuperWord::split_packs_longer_than_max_vector_size () {
1388
- assert (!_packset.is_empty (), " packset not empty" );
1389
- DEBUG_ONLY ( int old_packset_length = _packset.length (); )
1405
+ SuperWord::SplitStatus SuperWord::split_pack (const char * split_name,
1406
+ Node_List* pack,
1407
+ SplitTask task)
1408
+ {
1409
+ uint pack_size = pack->size ();
1390
1410
1391
- for (int i = 0 ; i < _packset.length (); i++) {
1392
- Node_List* pack = _packset.at (i);
1393
- assert (pack != nullptr , " no nullptr in packset" );
1394
- uint max_vlen = max_vector_size_in_def_use_chain (pack->at (0 ));
1395
- assert (is_power_of_2 (max_vlen), " sanity" );
1396
- uint pack_size = pack->size ();
1397
- if (pack_size <= max_vlen) {
1398
- continue ;
1399
- }
1400
- // Split off the "upper" nodes into new packs
1401
- Node_List* new_pack = new Node_List ();
1402
- for (uint j = max_vlen; j < pack_size; j++) {
1403
- Node* n = pack->at (j);
1404
- // is new_pack full?
1405
- if (new_pack->size () >= max_vlen) {
1406
- assert (is_power_of_2 (new_pack->size ()), " sanity %d" , new_pack->size ());
1407
- _packset.append (new_pack);
1408
- new_pack = new Node_List ();
1409
- }
1410
- new_pack->push (n);
1411
- }
1412
- // remaining new_pack
1413
- if (new_pack->size () > 1 ) {
1414
- _packset.append (new_pack);
1415
- } else {
1411
+ if (task.is_unchanged ()) {
1412
+ return SplitStatus::make_unchanged (pack);
1413
+ }
1414
+
1415
+ if (task.is_rejected ()) {
1416
1416
#ifndef PRODUCT
1417
1417
if (is_trace_superword_rejections ()) {
1418
1418
tty->cr ();
1419
- tty->print_cr (" WARNING: Node dropped out of odd size pack:" );
1420
- new_pack->at (0 )->dump ();
1419
+ tty->print_cr (" WARNING: Removed pack during split: %s:" , task.message ());
1421
1420
print_pack (pack);
1422
1421
}
1423
1422
#endif
1423
+ for (uint i = 0 ; i < pack_size; i++) {
1424
+ Node* n = pack->at (i);
1425
+ set_my_pack (n, nullptr );
1424
1426
}
1425
- // truncate
1426
- while (pack->size () > max_vlen) {
1427
- pack->pop ();
1427
+ return SplitStatus::make_rejected ();
1428
+ }
1429
+
1430
+ uint split_size = task.split_size ();
1431
+ assert (0 < split_size && split_size < pack_size, " split_size must be in range" );
1432
+
1433
+ // Split the size
1434
+ uint new_size = split_size;
1435
+ uint old_size = pack_size - new_size;
1436
+
1437
+ #ifndef PRODUCT
1438
+ if (is_trace_superword_packset ()) {
1439
+ tty->cr ();
1440
+ tty->print_cr (" INFO: splitting pack (sizes: %d %d): %s:" ,
1441
+ old_size, new_size, task.message ());
1442
+ print_pack (pack);
1443
+ }
1444
+ #endif
1445
+
1446
+ // Are both sizes too small to be a pack?
1447
+ if (old_size < 2 && new_size < 2 ) {
1448
+ assert (old_size == 1 && new_size == 1 , " implied" );
1449
+ #ifndef PRODUCT
1450
+ if (is_trace_superword_rejections ()) {
1451
+ tty->cr ();
1452
+ tty->print_cr (" WARNING: Removed size 2 pack, cannot be split: %s:" , task.message ());
1453
+ print_pack (pack);
1454
+ }
1455
+ #endif
1456
+ for (uint i = 0 ; i < pack_size; i++) {
1457
+ Node* n = pack->at (i);
1458
+ set_my_pack (n, nullptr );
1428
1459
}
1460
+ return SplitStatus::make_rejected ();
1429
1461
}
1430
1462
1431
- assert (old_packset_length <= _packset.length (), " we only increased the number of packs" );
1463
+ // Just pop off a single node?
1464
+ if (new_size < 2 ) {
1465
+ assert (new_size == 1 && old_size >= 2 , " implied" );
1466
+ Node* n = pack->pop ();
1467
+ set_my_pack (n, nullptr );
1468
+ #ifndef PRODUCT
1469
+ if (is_trace_superword_rejections ()) {
1470
+ tty->cr ();
1471
+ tty->print_cr (" WARNING: Removed node from pack, because of split: %s:" , task.message ());
1472
+ n->dump ();
1473
+ }
1474
+ #endif
1475
+ return SplitStatus::make_modified (pack);
1476
+ }
1477
+
1478
+ // Just remove a single node at front?
1479
+ if (old_size < 2 ) {
1480
+ assert (old_size == 1 && new_size >= 2 , " implied" );
1481
+ Node* n = pack->at (0 );
1482
+ pack->remove (0 );
1483
+ set_my_pack (n, nullptr );
1484
+ #ifndef PRODUCT
1485
+ if (is_trace_superword_rejections ()) {
1486
+ tty->cr ();
1487
+ tty->print_cr (" WARNING: Removed node from pack, because of split: %s:" , task.message ());
1488
+ n->dump ();
1489
+ }
1490
+ #endif
1491
+ return SplitStatus::make_modified (pack);
1492
+ }
1493
+
1494
+ // We will have two packs
1495
+ assert (old_size >= 2 && new_size >= 2 , " implied" );
1496
+ Node_List* new_pack = new Node_List (new_size);
1497
+
1498
+ for (uint i = 0 ; i < new_size; i++) {
1499
+ Node* n = pack->at (old_size + i);
1500
+ new_pack->push (n);
1501
+ set_my_pack (n, new_pack);
1502
+ }
1503
+
1504
+ for (uint i = 0 ; i < new_size; i++) {
1505
+ pack->pop ();
1506
+ }
1507
+
1508
+ // We assume that new_pack is more "stable" (i.e. will have to be split less than new_pack).
1509
+ // Put "pack" second, so that we insert it later in the list, and iterate over it again sooner.
1510
+ return SplitStatus::make_split (new_pack, pack);
1511
+ }
1512
+
1513
+ template <typename SplitStrategy>
1514
+ void SuperWord::split_packs (const char * split_name,
1515
+ SplitStrategy strategy) {
1516
+ bool changed;
1517
+ do {
1518
+ changed = false ;
1519
+ int new_packset_length = 0 ;
1520
+ for (int i = 0 ; i < _packset.length (); i++) {
1521
+ Node_List* pack = _packset.at (i);
1522
+ assert (pack != nullptr && pack->size () >= 2 , " no nullptr, at least size 2" );
1523
+ SplitTask task = strategy (pack);
1524
+ SplitStatus status = split_pack (split_name, pack, task);
1525
+ changed |= !status.is_unchanged ();
1526
+ Node_List* first_pack = status.first_pack ();
1527
+ Node_List* second_pack = status.second_pack ();
1528
+ _packset.at_put (i, nullptr ); // take out pack
1529
+ if (first_pack != nullptr ) {
1530
+ // The first pack can be put at the current position
1531
+ assert (i >= new_packset_length, " only move packs down" );
1532
+ _packset.at_put (new_packset_length++, first_pack);
1533
+ }
1534
+ if (second_pack != nullptr ) {
1535
+ // The second node has to be appended at the end
1536
+ _packset.append (second_pack);
1537
+ }
1538
+ }
1539
+ _packset.trunc_to (new_packset_length);
1540
+ } while (changed);
1432
1541
1433
1542
#ifndef PRODUCT
1434
1543
if (is_trace_superword_packset ()) {
1435
- tty->print_cr (" \n After Superword::split_packs_longer_than_max_vector_size " );
1544
+ tty->print_cr (" \n After %s " , split_name );
1436
1545
print_packset ();
1437
1546
}
1438
1547
#endif
1439
1548
}
1440
1549
1550
+ // Split packs at boundaries where left and right have different use or def packs.
1551
+ void SuperWord::split_packs_at_use_def_boundaries () {
1552
+ split_packs (" SuperWord::split_packs_at_use_def_boundaries" ,
1553
+ [&](const Node_List* pack) {
1554
+ uint pack_size = pack->size ();
1555
+ uint boundary = find_use_def_boundary (pack);
1556
+ assert (boundary < pack_size, " valid boundary %d" , boundary);
1557
+ if (boundary != 0 ) {
1558
+ return SplitTask::make_split (pack_size - boundary, " found a use/def boundary" );
1559
+ }
1560
+ return SplitTask::make_unchanged ();
1561
+ });
1562
+ }
1563
+
1564
+ // Split packs that are only implemented with a smaller pack size. Also splits packs
1565
+ // such that they eventually have power of 2 size.
1566
+ void SuperWord::split_packs_only_implemented_with_smaller_size () {
1567
+ split_packs (" SuperWord::split_packs_only_implemented_with_smaller_size" ,
1568
+ [&](const Node_List* pack) {
1569
+ uint pack_size = pack->size ();
1570
+ uint implemented_size = max_implemented_size (pack);
1571
+ if (implemented_size == 0 ) {
1572
+ return SplitTask::make_rejected (" not implemented at any smaller size" );
1573
+ }
1574
+ assert (is_power_of_2 (implemented_size), " power of 2 size or zero: %d" , implemented_size);
1575
+ if (implemented_size != pack_size) {
1576
+ return SplitTask::make_split (implemented_size, " only implemented at smaller size" );
1577
+ }
1578
+ return SplitTask::make_unchanged ();
1579
+ });
1580
+ }
1581
+
1582
+ // Split packs that have a mutual dependency, until all packs are mutually_independent.
1583
+ void SuperWord::split_packs_to_break_mutual_dependence () {
1584
+ split_packs (" SuperWord::split_packs_to_break_mutual_dependence" ,
1585
+ [&](const Node_List* pack) {
1586
+ uint pack_size = pack->size ();
1587
+ assert (is_power_of_2 (pack_size), " ensured by earlier splits %d" , pack_size);
1588
+ if (!is_marked_reduction (pack->at (0 )) &&
1589
+ !mutually_independent (pack)) {
1590
+ // As a best guess, we split the pack in half. This way, we iteratively make the
1591
+ // packs smaller, until there is no dependency.
1592
+ return SplitTask::make_split (pack_size >> 1 , " was not mutually independent" );
1593
+ }
1594
+ return SplitTask::make_unchanged ();
1595
+ });
1596
+ }
1597
+
1441
1598
template <typename FilterPredicate>
1442
1599
void SuperWord::filter_packs (const char * filter_name,
1443
1600
const char * error_message,
@@ -1642,7 +1799,7 @@ void SuperWord::filter_packs_for_implemented() {
1642
1799
filter_packs (" SuperWord::filter_packs_for_implemented" ,
1643
1800
" Unimplemented" ,
1644
1801
[&](const Node_List* pack) {
1645
- return implemented (pack);
1802
+ return implemented (pack, pack-> size () );
1646
1803
});
1647
1804
}
1648
1805
@@ -1664,7 +1821,7 @@ void SuperWord::filter_packs_for_profitable() {
1664
1821
while (true ) {
1665
1822
int old_packset_length = _packset.length ();
1666
1823
filter_packs (nullptr , // don't dump each time
1667
- " size is not a power of 2 " ,
1824
+ " not profitable " ,
1668
1825
[&](const Node_List* pack) {
1669
1826
return profitable (pack);
1670
1827
});
@@ -1683,14 +1840,13 @@ void SuperWord::filter_packs_for_profitable() {
1683
1840
#endif
1684
1841
}
1685
1842
1686
- // ------------------------------implemented---------------------------
1687
- // Can code be generated for pack p?
1688
- bool SuperWord::implemented ( const Node_List* p) {
1843
+ // Can code be generated for the pack, restricted to size nodes?
1844
+ bool SuperWord::implemented ( const Node_List* pack, uint size) {
1845
+ assert (size >= 2 && size <= pack-> size () && is_power_of_2 (size), " valid size " );
1689
1846
bool retValue = false ;
1690
- Node* p0 = p ->at (0 );
1847
+ Node* p0 = pack ->at (0 );
1691
1848
if (p0 != nullptr ) {
1692
1849
int opc = p0->Opcode ();
1693
- uint size = p->size ();
1694
1850
if (is_marked_reduction (p0)) {
1695
1851
const Type *arith_type = p0->bottom_type ();
1696
1852
// Length 2 reductions of INT/LONG do not offer performance benefits
@@ -1732,6 +1888,22 @@ bool SuperWord::implemented(const Node_List* p) {
1732
1888
return retValue;
1733
1889
}
1734
1890
1891
+ // Find the maximal implemented size smaller or equal to the packs size
1892
+ uint SuperWord::max_implemented_size (const Node_List* pack) {
1893
+ uint size = round_down_power_of_2 (pack->size ());
1894
+ if (implemented (pack, size)) {
1895
+ return size;
1896
+ } else {
1897
+ // Iteratively divide size by 2, and check.
1898
+ for (uint s = size >> 1 ; s >= 2 ; s >>= 1 ) {
1899
+ if (implemented (pack, s)) {
1900
+ return s;
1901
+ }
1902
+ }
1903
+ return 0 ; // not implementable at all
1904
+ }
1905
+ }
1906
+
1735
1907
bool SuperWord::requires_long_to_int_conversion (int opc) {
1736
1908
switch (opc) {
1737
1909
case Op_PopCountL:
@@ -2763,6 +2935,94 @@ void SuperWord::verify_no_extract() {
2763
2935
}
2764
2936
#endif
2765
2937
2938
+ // Check if n_super's pack uses are a superset of n_sub's pack uses.
2939
+ bool SuperWord::has_use_pack_superset (const Node* n_super, const Node* n_sub) const {
2940
+ Node_List* pack = my_pack (n_super);
2941
+ assert (pack != nullptr && pack == my_pack (n_sub), " must have the same pack" );
2942
+
2943
+ // For all uses of n_sub that are in a pack (use_sub) ...
2944
+ for (DUIterator_Fast jmax, j = n_sub->fast_outs (jmax); j < jmax; j++) {
2945
+ Node* use_sub = n_sub->fast_out (j);
2946
+ Node_List* pack_use_sub = my_pack (use_sub);
2947
+ if (pack_use_sub == nullptr ) { continue ; }
2948
+
2949
+ // ... and all input edges: use_sub->in(i) == n_sub.
2950
+ uint start, end;
2951
+ VectorNode::vector_operands (use_sub, &start, &end);
2952
+ for (uint i = start; i < end; i++) {
2953
+ if (use_sub->in (i) != n_sub) { continue ; }
2954
+
2955
+ // Check if n_super has any use use_super in the same pack ...
2956
+ bool found = false ;
2957
+ for (DUIterator_Fast kmax, k = n_super->fast_outs (kmax); k < kmax; k++) {
2958
+ Node* use_super = n_super->fast_out (k);
2959
+ Node_List* pack_use_super = my_pack (use_super);
2960
+ if (pack_use_sub != pack_use_super) { continue ; }
2961
+
2962
+ // ... and where there is an edge use_super->in(i) == n_super.
2963
+ // For MulAddS2I it is expected to have defs over different input edges.
2964
+ if (use_super->in (i) != n_super && !VectorNode::is_muladds2i (use_super)) { continue ; }
2965
+
2966
+ found = true ;
2967
+ break ;
2968
+ }
2969
+ if (!found) {
2970
+ // n_sub has a use-edge (use_sub->in(i) == n_sub) with use_sub in a packset,
2971
+ // but n_super does not have any edge (use_super->in(i) == n_super) with
2972
+ // use_super in the same packset. Hence, n_super does not have a use pack
2973
+ // superset of n_sub.
2974
+ return false ;
2975
+ }
2976
+ }
2977
+ }
2978
+ // n_super has all edges that n_sub has.
2979
+ return true ;
2980
+ }
2981
+
2982
+ // Find a boundary in the pack, where left and right have different pack uses and defs.
2983
+ // This is a natural boundary to split a pack, to ensure that use and def packs match.
2984
+ // If no boundary is found, return zero.
2985
+ uint SuperWord::find_use_def_boundary (const Node_List* pack) const {
2986
+ Node* p0 = pack->at (0 );
2987
+ Node* p1 = pack->at (1 );
2988
+
2989
+ const bool is_reduction_pack = reduction (p0, p1);
2990
+
2991
+ // Inputs range
2992
+ uint start, end;
2993
+ VectorNode::vector_operands (p0, &start, &end);
2994
+
2995
+ for (int i = pack->size () - 2 ; i >= 0 ; i--) {
2996
+ // For all neighbours
2997
+ Node* n0 = pack->at (i + 0 );
2998
+ Node* n1 = pack->at (i + 1 );
2999
+
3000
+
3001
+ // 1. Check for matching defs
3002
+ for (uint j = start; j < end; j++) {
3003
+ Node* n0_in = n0->in (j);
3004
+ Node* n1_in = n1->in (j);
3005
+ // No boundary if:
3006
+ // 1) the same packs OR
3007
+ // 2) reduction edge n0->n1 or n1->n0
3008
+ if (my_pack (n0_in) != my_pack (n1_in) &&
3009
+ !((n0 == n1_in || n1 == n0_in) && is_reduction_pack)) {
3010
+ return i + 1 ;
3011
+ }
3012
+ }
3013
+
3014
+ // 2. Check for matching uses: equal if both are superset of the other.
3015
+ // Reductions have no pack uses, so they match trivially on the use packs.
3016
+ if (!is_reduction_pack &&
3017
+ !(has_use_pack_superset (n0, n1) &&
3018
+ has_use_pack_superset (n1, n0))) {
3019
+ return i + 1 ;
3020
+ }
3021
+ }
3022
+
3023
+ return 0 ;
3024
+ }
3025
+
2766
3026
// ------------------------------is_vector_use---------------------------
2767
3027
// Is use->in(u_idx) a vector use?
2768
3028
bool SuperWord::is_vector_use (Node* use, int u_idx) {
0 commit comments