Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8312116 GenShen: make instantaneous allocation rate triggers more timely #327

Closed
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
59c524b
Add a new form of allocation spike trigger for accelerating alloc rates
kdnilsen Sep 18, 2023
b26b744
Merge remote-tracking branch 'origin' into make-instantaneous-alloc-r…
kdnilsen Sep 18, 2023
bddb7b5
Fixup errors in computation of accelerated memory consumption
kdnilsen Sep 18, 2023
3ae5b65
Improve log message and fix white space
kdnilsen Sep 18, 2023
61d3e03
Fix white space
kdnilsen Sep 18, 2023
2c2cf70
Change _last_trigger to OTHER for accelerated-rate trigger
kdnilsen Sep 18, 2023
5050e39
Add every sample to history but only trigger if spiking
kdnilsen Sep 19, 2023
734ca37
Fine tune and fix the heuristics
kdnilsen Sep 25, 2023
d6bc97e
Checkpoint this code so we can pursue suspected deadlock JBS issue
kdnilsen Sep 27, 2023
7315c76
Give higher priority to young over old and disable instrumentation
kdnilsen Sep 27, 2023
61fd45f
Misc improvements
kdnilsen Oct 2, 2023
3fb62ea
Merge remote-tracking branch 'origin/master' into make-instantaneous-…
kdnilsen Oct 2, 2023
6c4b757
Multiple improvements mostly redundanant with humongous allocation patch
kdnilsen Oct 6, 2023
e41b5c0
Use int index so loop will terminate
kdnilsen Oct 6, 2023
1775c7b
Merge remote-tracking branch 'gitfarm/make-instantaneous-alloc-rate-t…
kdnilsen Oct 6, 2023
a1eaae8
Fix compare_by_index to deal with equality
kdnilsen Oct 7, 2023
d113124
Fix accounting of mixed-evac candidates selected to defragment old-gen
kdnilsen Oct 10, 2023
8906d8a
Merge remote-tracking branch 'origin/master' into make-instantaneous-…
kdnilsen Oct 25, 2023
33237d2
Fix whitespace
kdnilsen Oct 25, 2023
03bdbf8
Experiment with alternative defaults
kdnilsen Oct 26, 2023
3f9b089
Use FreeSet data to calculate instantaneous allocation spikes
kdnilsen Oct 27, 2023
9a53cce
Debug and Tune for performance
kdnilsen Oct 28, 2023
d63a80c
Some refinements for better performance
kdnilsen Oct 30, 2023
926cd20
More tuning
kdnilsen Oct 31, 2023
a47ad83
Some experiments with penalties
kdnilsen Oct 31, 2023
b68af4e
Fix up handling of penalties
kdnilsen Oct 31, 2023
df9a229
Fix errors in free set implementation
kdnilsen Nov 1, 2023
1156ae4
Only start_idle_span after adjusting penalties for YOUNG gen
kdnilsen Nov 3, 2023
28c8604
Use configuration parameters for certain constants
kdnilsen Nov 4, 2023
5dc835c
Fix error introduced in previous commit to this branch
kdnilsen Nov 6, 2023
8061b63
Improve support for throttling and recalibrate after old marking
kdnilsen Nov 6, 2023
9a1ee5b
Revert ShenandoahFullGCThreshold
kdnilsen Nov 6, 2023
69273c8
Make goodness for acceleration detection adaptive
kdnilsen Nov 14, 2023
ae2bc68
Adjust defaults and report goodness ratio in acceleration trigger
kdnilsen Nov 14, 2023
5524ca5
Merge remote-tracking branch 'origin/master' into make-instantaneous-…
kdnilsen Nov 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -30,7 +30,9 @@
#include "memory/allocation.hpp"
#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
#include "gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp"
#include "gc/shenandoah/shenandoahFreeSet.hpp"
#include "gc/shenandoah/shenandoahPhaseTimings.hpp"
#include "gc/shenandoah/shenandoahRegulatorThread.hpp"
#include "gc/shenandoah/shenandoahSharedVariables.hpp"
#include "utilities/numberSeq.hpp"

@@ -43,6 +45,12 @@ class ShenandoahAllocationRate : public CHeapObj<mtGC> {

double upper_bound(double sds) const;
bool is_spiking(double rate, double threshold) const;
double interval() const {
return _interval_sec;
}
double last_sample_time() const {
return _last_sample_time;
}
private:

double instantaneous_rate(double time, size_t allocated) const;
@@ -71,17 +79,34 @@ class ShenandoahAdaptiveHeuristics : public ShenandoahHeuristics {

virtual ~ShenandoahAdaptiveHeuristics();

virtual void initialize();

virtual void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
RegionData* data, size_t size,
size_t actual_free);

virtual void adjust_penalty(intx step);

// In case we rebuild free set during idle span, such as when we finish OLD GC marking and add immediate garbage to
// free set, invoke this to recalibrate the triggering heuristic.
void resume_idle_span(size_t mutator_available);

void start_evac_span(size_t mutator_available);

// How much memory is available for mutator allocations?
// (as calculated by mutator free at last rebuild minus mutator allocations since last rebuild)
inline size_t allocatable() const {
size_t total_allocations = _freeset->get_mutator_allocations();
return (total_allocations > _allocation_cliff)? 0: _allocation_cliff - total_allocations;
}

void record_cycle_start();
void record_degenerated_cycle_start(bool out_of_cycle);
void record_success_concurrent(bool abbreviated);
void record_success_degenerated();
void record_success_full();

virtual bool should_start_gc();

virtual const char* name() { return "Adaptive"; }
virtual bool is_diagnostic() { return false; }
virtual bool is_experimental() { return false; }
@@ -99,6 +124,9 @@ class ShenandoahAdaptiveHeuristics : public ShenandoahHeuristics {
const static double LOWEST_EXPECTED_AVAILABLE_AT_END;
const static double HIGHEST_EXPECTED_AVAILABLE_AT_END;

const static size_t GC_TIME_SAMPLE_SIZE;
const static size_t HISTORICAL_PERIOD_SAMPLE_SIZE;

friend class ShenandoahAllocationRate;

// Used to record the last trigger that signaled to start a GC.
@@ -113,6 +141,11 @@ class ShenandoahAdaptiveHeuristics : public ShenandoahHeuristics {
void adjust_margin_of_error(double amount);
void adjust_spike_threshold(double amount);

void add_rate_to_acceleration_history(double timestamp, double rate);
size_t accelerated_consumption(double& acceleration, double& current_rate, double predicted_cycle_time) const;

void start_idle_span();

protected:
ShenandoahAllocationRate _allocation_rate;

@@ -128,7 +161,7 @@ class ShenandoahAdaptiveHeuristics : public ShenandoahHeuristics {
// rate exceeds this threshold, a GC cycle is started. As this value
// decreases the sensitivity to allocation spikes increases. In other
// words, lowering the spike threshold will tend to increase the number
// of concurrent GCs.
// of concurrent GCs because more scenarios will be seen as spiking.
double _spike_threshold_sd;

// Remember which trigger is responsible for the last GC cycle. When the
@@ -142,6 +175,47 @@ class ShenandoahAdaptiveHeuristics : public ShenandoahHeuristics {
// source of feedback to adjust trigger parameters.
TruncatedSeq _available;

ShenandoahFreeSet* _freeset;
ShenandoahRegulatorThread* _regulator_thread;

size_t _previous_total_allocations;
double _previous_allocation_timestamp;
size_t _total_allocations_at_start_of_idle;
size_t _allocation_cliff;

// Keep track of GC_TIME_SAMPLE_SIZE most recent concurrent GC cycle times
uint _gc_time_first_sample_index;
uint _gc_time_num_samples;
double* const _gc_time_timestamps;
double* const _gc_time_samples;
double* const _gc_time_xy; // timestamp * sample
double* const _gc_time_xx; // timestamp squared
double _gc_time_sum_of_timestamps;
double _gc_time_sum_of_samples;
double _gc_time_sum_of_xy;
double _gc_time_sum_of_xx;

double _gc_time_m; // slope
double _gc_time_b; // y-intercept
double _gc_time_sd; // sd on deviance from prediction

void add_gc_time(double timestamp_at_start, double duration);
void add_degenerated_gc_time(double timestamp_at_start, double duration);
double predict_gc_time(double timestamp_at_start);

// Keep track of SPIKE_ACCELERATION_SAMPLE_SIZE most recent spike allocation rate measurements. Note that it is
// typical to experience a small spike following end of GC cycle, as mutator threads refresh their TLABs. But
// there is generally an abundance of memory at this time as well, so this will not generally trigger GC.
uint _spike_acceleration_first_sample_index;
uint _spike_acceleration_num_samples;
double* const _spike_acceleration_rate_samples;
double* const _spike_acceleration_rate_timestamps;

size_t _most_recent_headroom_at_start_of_idle;

double _acceleration_goodness_ratio;
size_t _consecutive_goodness;

size_t min_free_threshold();
};

Original file line number Diff line number Diff line change
@@ -47,7 +47,8 @@ ShenandoahHeuristics::ShenandoahHeuristics(ShenandoahSpaceInfo* space_info) :
_space_info(space_info),
_region_data(nullptr),
_guaranteed_gc_interval(0),
_cycle_start(os::elapsedTime()),
_precursor_cycle_start(os::elapsedTime()),
_cycle_start(_precursor_cycle_start),
_last_cycle_end(0),
_gc_times_learned(0),
_gc_time_penalties(0),
@@ -180,6 +181,15 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec
collection_set->count());
}

void ShenandoahHeuristics::record_degenerated_cycle_start(bool out_of_cycle) {
if (out_of_cycle) {
_precursor_cycle_start = _cycle_start = os::elapsedTime();
} else {
_precursor_cycle_start = _cycle_start;
_cycle_start = os::elapsedTime();
}
}

void ShenandoahHeuristics::record_cycle_start() {
_cycle_start = os::elapsedTime();
}
@@ -288,3 +298,8 @@ void ShenandoahHeuristics::initialize() {
double ShenandoahHeuristics::elapsed_cycle_time() const {
return os::elapsedTime() - _cycle_start;
}

// Includes the time spent in abandoned concurrent GC cycle that pr
double ShenandoahHeuristics::elapsed_degenerated_cycle_time() const {
return os::elapsedTime() - _precursor_cycle_start;
}
Original file line number Diff line number Diff line change
@@ -99,6 +99,7 @@ class ShenandoahHeuristics : public CHeapObj<mtGC> {

size_t _guaranteed_gc_interval;

double _precursor_cycle_start;
double _cycle_start;
double _last_cycle_end;

@@ -119,7 +120,7 @@ class ShenandoahHeuristics : public CHeapObj<mtGC> {
RegionData* data, size_t data_size,
size_t free) = 0;

void adjust_penalty(intx step);
virtual void adjust_penalty(intx step);

public:
ShenandoahHeuristics(ShenandoahSpaceInfo* space_info);
@@ -135,6 +136,8 @@ class ShenandoahHeuristics : public CHeapObj<mtGC> {

virtual void record_cycle_start();

virtual void record_degenerated_cycle_start(bool out_of_cycle);

virtual void record_cycle_end();

virtual bool should_start_gc();
@@ -163,6 +166,7 @@ class ShenandoahHeuristics : public CHeapObj<mtGC> {
virtual void initialize();

double elapsed_cycle_time() const;
double elapsed_degenerated_cycle_time() const;
};

#endif // SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHHEURISTICS_HPP
1 change: 1 addition & 0 deletions src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp
Original file line number Diff line number Diff line change
@@ -201,6 +201,7 @@ bool ShenandoahConcurrentGC::collect(GCCause::Cause cause) {
}
}

size_t muator_free;
if (heap->has_forwarded_objects()) {
// Perform update-refs phase.
vmop_entry_init_updaterefs();
19 changes: 8 additions & 11 deletions src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp
Original file line number Diff line number Diff line change
@@ -110,7 +110,7 @@ void ShenandoahControlThread::run_service() {
ShenandoahCollectorPolicy* policy = heap->shenandoah_policy();

// Heuristics are notified of allocation failures here and other outcomes
// of the cycle. They're also used here to control whether the Nth consecutive
// of the cycle. They are also used here to control whether the Nth consecutive
// degenerated cycle should be 'promoted' to a full cycle. The decision to
// trigger a cycle or not is evaluated on the regulator thread.
ShenandoahHeuristics* global_heuristics = heap->global_generation()->heuristics();
@@ -163,12 +163,11 @@ void ShenandoahControlThread::run_service() {
set_gc_mode(stw_degenerated);
} else {
// TODO: if humongous_alloc_failure_pending, there might be value in trying a "compacting" degen before
// going all the way to full. But it's a lot of work to implement this, and it may not provide value.
// going all the way to full. But it is a lot of work to implement this, and it may not provide value.
// A compacting degen can move young regions around without doing full old-gen mark (relying upon the
// remembered set scan), so it might be faster than a full gc.
//
// Longer term, think about how to defragment humongous memory concurrently.

heuristics->record_allocation_failure_gc();
policy->record_alloc_failure_to_full();
generation = select_global_generation();
@@ -213,7 +212,7 @@ void ShenandoahControlThread::run_service() {
if (_requested_gc_cause == GCCause::_shenandoah_concurrent_gc) {
if (_requested_generation == OLD && heap->doing_mixed_evacuations()) {
// If a request to start an old cycle arrived while an old cycle was running, but _before_
// it chose any regions for evacuation we don't want to start a new old cycle. Rather, we want
// it chose any regions for evacuation we do not want to start a new old cycle. Rather, we want
// the heuristic to run a young collection so that we can evacuate some old regions.
assert(!heap->is_concurrent_old_mark_in_progress(), "Should not be running mixed collections and concurrent marking");
generation = YOUNG;
@@ -224,7 +223,7 @@ void ShenandoahControlThread::run_service() {
cause = GCCause::_shenandoah_concurrent_gc;
set_gc_mode(default_mode);

// Don't start a new old marking if there is one already in progress
// Do not start a new old marking if there is one already in progress
if (generation == OLD && heap->is_concurrent_old_mark_in_progress()) {
set_gc_mode(servicing_old);
}
@@ -235,7 +234,7 @@ void ShenandoahControlThread::run_service() {
heap->set_unload_classes(false);
}

// Don't want to spin in this loop and start a cycle every time, so
// Do not want to spin in this loop and start a cycle every time, so
// clear requested gc cause. This creates a race with callers of the
// blocking 'request_gc' method, but there it loops and resets the
// '_requested_gc_cause' until a full cycle is completed.
@@ -399,7 +398,7 @@ void ShenandoahControlThread::run_service() {
last_shrink_time = current;
}

// Don't wait around if there was an allocation failure - start the next cycle immediately.
// Do not wait around if there was an allocation failure - start the next cycle immediately.
if (!is_alloc_failure_gc()) {
// The timed wait is necessary because this thread has a responsibility to send
// 'alloc_words' to the pacer when it does not perform a GC.
@@ -408,7 +407,7 @@ void ShenandoahControlThread::run_service() {
}
}

// Wait for the actual stop(), can't leave run_service() earlier.
// Wait for the actual stop(), cannot leave run_service() earlier.
while (!should_terminate()) {
os::naked_short_sleep(ShenandoahControlIntervalMin);
}
@@ -799,7 +798,7 @@ bool ShenandoahControlThread::service_stw_degenerated_cycle(GCCause::Cause cause
ShenandoahHeap* const heap = ShenandoahHeap::heap();

GCIdMark gc_id_mark;
ShenandoahGCSession session(cause, _degen_generation);
ShenandoahDegeneratedGCSession session(cause, _degen_generation, point == ShenandoahGC::_degenerated_outside_cycle);

ShenandoahDegenGC gc(point, _degen_generation);
gc.collect(cause);
@@ -900,8 +899,6 @@ bool ShenandoahControlThread::request_concurrent_gc(ShenandoahGenerationType gen
}

if (preempt_old_marking(generation)) {
log_info(gc)("Preempting old generation mark to allow %s GC", shenandoah_generation_name(generation));
assert(gc_mode() == servicing_old, "Expected to be servicing old, but was: %s.", gc_mode_name(gc_mode()));
_requested_gc_cause = GCCause::_shenandoah_concurrent_gc;
_requested_generation = generation;
_preemption_requested.set();
Loading