Skip to content

Commit 129c470

Browse files
committedNov 17, 2023
8311932: Suboptimal compiled code of nested loop over memory segment
Reviewed-by: thartmann, chagedorn
1 parent 369bbec commit 129c470

File tree

2 files changed

+80
-4
lines changed

2 files changed

+80
-4
lines changed
 

‎src/hotspot/share/opto/loopnode.cpp

+22
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,28 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
855855
// not a loop after all
856856
return false;
857857
}
858+
859+
if (range_checks.size() > 0) {
860+
// This transformation requires peeling one iteration. Also, if it has range checks and they are eliminated by Loop
861+
// Predication, then 2 Hoisted Check Predicates are added for one range check. Finally, transforming a long range
862+
// check requires extra logic to be executed before the loop is entered and for the outer loop. As a result, the
863+
// transformations can't pay off for a small number of iterations: roughly, if the loop runs for 3 iterations, it's
864+
// going to execute as many range checks once transformed with range checks eliminated (1 peeled iteration with
865+
// range checks + 2 predicates per range checks) as it would have not transformed. It also has to pay for the extra
866+
// logic on loop entry and for the outer loop.
867+
loop->compute_trip_count(this);
868+
if (head->is_CountedLoop() && head->as_CountedLoop()->has_exact_trip_count()) {
869+
if (head->as_CountedLoop()->trip_count() <= 3) {
870+
return false;
871+
}
872+
} else {
873+
loop->compute_profile_trip_cnt(this);
874+
if (!head->is_profile_trip_failed() && head->profile_trip_cnt() <= 3) {
875+
return false;
876+
}
877+
}
878+
}
879+
858880
julong orig_iters = (julong)hi->hi_as_long() - lo->lo_as_long();
859881
iters_limit = checked_cast<int>(MIN2((julong)iters_limit, orig_iters));
860882

‎test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java

+58-4
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
/*
3131
* @test
32-
* @bug 8259609 8276116
32+
* @bug 8259609 8276116 8311932
3333
* @summary C2: optimize long range checks in long counted loops
3434
* @library /test/lib /
3535
* @requires vm.compiler2.enabled
@@ -38,9 +38,9 @@
3838

3939
public class TestLongRangeChecks {
4040
public static void main(String[] args) {
41-
TestFramework.runWithFlags("-XX:-UseCountedLoopSafepoints");
42-
TestFramework.runWithFlags("-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1");
43-
TestFramework.runWithFlags("-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1000");
41+
TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:-UseCountedLoopSafepoints", "-XX:LoopUnrollLimit=0");
42+
TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1", "-XX:LoopUnrollLimit=0");
43+
TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1000", "-XX:LoopUnrollLimit=0");
4444
}
4545

4646

@@ -246,4 +246,58 @@ public static void testStridePosScaleNegInIntLoop2(int start, int stop, long len
246246
private void testStridePosScaleNegInIntLoop2_runner() {
247247
testStridePosScaleNegInIntLoop2(0, 100, 200, 198);
248248
}
249+
250+
@Test
251+
@IR(counts = { IRNode.LONG_COUNTED_LOOP, "1" })
252+
@IR(failOn = { IRNode.COUNTED_LOOP, IRNode.LOOP })
253+
public static void testStridePosScalePosShortLoop(long start, long stop, long length, long offset) {
254+
final long scale = 1;
255+
final long stride = 1;
256+
257+
// Loop runs for too few iterations. Transforming it wouldn't pay off.
258+
for (long i = start; i < stop; i += stride) {
259+
Objects.checkIndex(scale * i + offset, length);
260+
}
261+
}
262+
263+
@Run(test = "testStridePosScalePosShortLoop")
264+
private void testStridePosScalePosShortLoop_runner() {
265+
testStridePosScalePosShortLoop(0, 2, 2, 0);
266+
}
267+
268+
@Test
269+
@IR(counts = { IRNode.COUNTED_LOOP, "1" })
270+
@IR(failOn = { IRNode.LOOP })
271+
public static void testStridePosScalePosInIntLoopShortLoop1(int start, int stop, long length, long offset) {
272+
final long scale = 2;
273+
final int stride = 1;
274+
275+
// Same but with int loop
276+
for (int i = start; i < stop; i += stride) {
277+
Objects.checkIndex(scale * i + offset, length);
278+
}
279+
}
280+
281+
@Run(test = "testStridePosScalePosInIntLoopShortLoop1")
282+
private void testStridePosScalePosInIntLoopShortLoop1_runner() {
283+
testStridePosScalePosInIntLoopShortLoop1(0, 2, 4, 0);
284+
}
285+
286+
@Test
287+
@IR(counts = { IRNode.COUNTED_LOOP, "1" })
288+
@IR(failOn = { IRNode.LOOP })
289+
public static void testStridePosScalePosInIntLoopShortLoop2(long length, long offset) {
290+
final long scale = 2;
291+
final int stride = 1;
292+
293+
// Same but with int loop
294+
for (int i = 0; i < 3; i += stride) {
295+
Objects.checkIndex(scale * i + offset, length);
296+
}
297+
}
298+
299+
@Run(test = "testStridePosScalePosInIntLoopShortLoop2")
300+
private void testStridePosScalePosInIntLoopShortLoop2_runner() {
301+
testStridePosScalePosInIntLoopShortLoop2(6, 0);
302+
}
249303
}

0 commit comments

Comments
 (0)
Please sign in to comment.