Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8342382: Implementation of JEP G1: Improve Application Throughput with a More Efficient Write-Barrier #23739

Open
wants to merge 29 commits into
base: master
Choose a base branch
from
Open
Changes from 2 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0372026
Card table as DCQ
tschatzl Feb 11, 2025
7782295
* remove some commented out debug code
tschatzl Feb 24, 2025
9e26abb
* remove mention of "enqueue" or "enqueuing" for actions related to p…
tschatzl Feb 24, 2025
3004a96
* fix crash when writing dirty cards for memory regions during card t…
tschatzl Feb 24, 2025
b8100b9
* mdoerr review: fix comments in ppc code
tschatzl Feb 24, 2025
0100d8e
* only provide byte map base for JavaThreads
tschatzl Feb 25, 2025
9ef9c5f
* remove unnecessarily added logging
tschatzl Feb 25, 2025
e51eec8
* ayang review 1
tschatzl Feb 28, 2025
7d361fc
* ayang review 1 (ctd)
tschatzl Feb 28, 2025
d87935a
* fix assert
tschatzl Feb 28, 2025
810bf2d
* fix comment (trailing whitespace)
tschatzl Mar 3, 2025
b3dd008
ayang review 2
tschatzl Mar 3, 2025
8f46dc9
* iwalulya initial comments
tschatzl Mar 4, 2025
9e2ee54
* do not change card table base for gc threads during swapping
tschatzl Mar 4, 2025
442d9ea
* iwalulya review 2
tschatzl Mar 4, 2025
fc674f0
* ayang review - fix comment
tschatzl Mar 4, 2025
b4d19d9
iwalulya review
tschatzl Mar 4, 2025
4a97811
ayang review
tschatzl Mar 4, 2025
a457e6e
* fix whitespace
tschatzl Mar 5, 2025
350a4fa
* iwalulya review
tschatzl Mar 6, 2025
c994000
* ayang review 3
tschatzl Mar 7, 2025
93b884f
* fix card table verification crashes: in the first refinement phase,…
tschatzl Mar 8, 2025
758fac0
* optimized RISCV gen_write_ref_array_post_barrier() implementation c…
tschatzl Mar 11, 2025
aec9505
Merge branch 'master' into 8342382-card-table-instead-of-dcq
tschatzl Mar 12, 2025
3766b76
* ayang review
tschatzl Mar 12, 2025
7861117
* when aborting refinement during full collection, the global card ta…
tschatzl Mar 13, 2025
51a9eed
* ayang review
tschatzl Mar 14, 2025
b073017
Merge branch 'master' into 8342381-card-table-instead-of-dcq
tschatzl Mar 14, 2025
447fe39
* more documentation on why we need to rendezvous the gc threads
tschatzl Mar 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/g1/g1BarrierSet.hpp
Original file line number Diff line number Diff line change
@@ -51,7 +51,7 @@ class G1CardTable;
// removes the need for any fine-grained (per mutator write) synchronization between
// them, keeping the write barrier simple.
//
// The refinement threads mark cards in the the current collection set specially on the
// The refinement threads mark cards in the current collection set specially on the
// card table - this is fine wrt to synchronization with the mutator, because at
// most the mutator will overwrite it again if there is a race, as G1 will scan the
// entire card either way during the GC pause.
6 changes: 6 additions & 0 deletions src/hotspot/share/gc/g1/g1CardTable.inline.hpp
Original file line number Diff line number Diff line change
@@ -44,6 +44,12 @@ inline bool G1CardTable::mark_clean_as_from_remset(CardValue* card) {
}

// Returns bits from a where mask is 0, and bits from b where mask is 1.
//
// Example:
// a = 0xAAAAAAAA
// b = 0xBBBBBBBB
// mask = 0xFF00FF00
// result = 0xBBAABBAA
inline size_t blend(size_t a, size_t b, size_t mask) {
return a ^ ((a ^ b) & mask);
}
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/g1/g1CardTableClaimTable.cpp
Original file line number Diff line number Diff line change
@@ -42,7 +42,7 @@ G1CardTableClaimTable::~G1CardTableClaimTable() {
FREE_C_HEAP_ARRAY(uint, _card_claims);
}

void G1CardTableClaimTable::initialize(size_t max_reserved_regions) {
void G1CardTableClaimTable::initialize(uint max_reserved_regions) {
assert(_card_claims == nullptr, "Must not be initialized twice");
_card_claims = NEW_C_HEAP_ARRAY(uint, max_reserved_regions, mtGC);
_max_reserved_regions = max_reserved_regions;
4 changes: 2 additions & 2 deletions src/hotspot/share/gc/g1/g1CardTableClaimTable.hpp
Original file line number Diff line number Diff line change
@@ -41,7 +41,7 @@ class G1HeapRegionClosure;
// Claiming works on full region (all cards in region) or a range of contiguous cards
// (chunk). Chunk size is given at construction time.
class G1CardTableClaimTable : public CHeapObj<mtGC> {
size_t _max_reserved_regions;
uint _max_reserved_regions;

// Card table iteration claim values for every heap region, from 0 (completely unclaimed)
// to (>=) G1HeapRegion::CardsPerRegion (completely claimed).
@@ -57,7 +57,7 @@ class G1CardTableClaimTable : public CHeapObj<mtGC> {
~G1CardTableClaimTable();

// Allocates the data structure and initializes the claims to unclaimed.
void initialize(size_t max_reserved_regions);
void initialize(uint max_reserved_regions);

void reset_all_claims_to_unclaimed();
void reset_all_claims_to_claimed();
69 changes: 47 additions & 22 deletions src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp
Original file line number Diff line number Diff line change
@@ -125,14 +125,14 @@
G1ConcurrentRefineWorkState::G1ConcurrentRefineWorkState(uint max_reserved_regions) :
_state(State::Idle),
_refine_work_epoch(0),
_sweep_state(new G1CardTableClaimTable(G1CollectedHeap::get_chunks_per_region_for_merge())),
_sweep_table(new G1CardTableClaimTable(G1CollectedHeap::get_chunks_per_region_for_merge())),
_stats()
{
_sweep_state->initialize(max_reserved_regions);
_sweep_table->initialize(max_reserved_regions);
}

G1ConcurrentRefineWorkState::~G1ConcurrentRefineWorkState() {
delete _sweep_state;
delete _sweep_table;
}

void G1ConcurrentRefineWorkState::set_state_start_time() {
@@ -264,24 +264,34 @@
return advance_state(State::SnapshotHeap);
}

void G1ConcurrentRefineWorkState::snapshot_heap() {
assert_state(State::SnapshotHeap);
void G1ConcurrentRefineWorkState::snapshot_heap(bool concurrent) {
if (concurrent) {
assert_state(State::SnapshotHeap);

set_state_start_time();
set_state_start_time();
} else {
assert(is_in_progress() && _state < State::SnapshotHeap, "Must be before %s but is %s", state_name(State::SnapshotHeap), state_name(_state));
}

snapshot_heap_into(_sweep_state);
snapshot_heap_into(_sweep_table);

advance_state(State::SweepRT);
if (concurrent) {
advance_state(State::SweepRT);
}
}

bool G1ConcurrentRefineWorkState::sweep_rt_step() {
void G1ConcurrentRefineWorkState::sweep_rt_start() {
assert_state(State::SweepRT);

set_state_start_time();
}

bool G1ConcurrentRefineWorkState::sweep_rt_step() {
assert_state(State::SweepRT);

G1ConcurrentRefine* cr = G1CollectedHeap::heap()->concurrent_refine();

G1ConcurrentRefineWorkTask task(_sweep_state, &_stats, cr->num_threads_wanted());
G1ConcurrentRefineWorkTask task(_sweep_table, &_stats, cr->num_threads_wanted());
cr->run_with_refinement_workers(&task);

if (task.sweep_completed()) {
@@ -332,18 +342,16 @@
return has_sweep_rt_work;
}

void G1ConcurrentRefineWorkState::snapshot_heap_into(G1CardTableClaimTable* sweep_state) {
sweep_state->reset_all_claims_to_claimed();
void G1ConcurrentRefineWorkState::snapshot_heap_into(G1CardTableClaimTable* sweep_table) {
// G1CollectedHeap::heap_region_iterate() will only visit committed regions. In the
// state table

Check failure on line 347 in src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp

openjdk / jcheck-openjdk/jdk-23739

Whitespace error

Column 16: trailing whitespace
sweep_table->reset_all_claims_to_claimed();

class SnapshotRegionsClosure : public G1HeapRegionClosure {
G1CardTableClaimTable* _sweep_state;
G1CardTableClaimTable* _sweep_table;

public:
size_t _num_clean;
size_t _num_dirty;
size_t _num_to_cset;

SnapshotRegionsClosure(G1CardTableClaimTable* sweep_state) : G1HeapRegionClosure(), _sweep_state(sweep_state), _num_clean(0), _num_dirty(0), _num_to_cset(0) { }
SnapshotRegionsClosure(G1CardTableClaimTable* sweep_table) : G1HeapRegionClosure(), _sweep_table(sweep_table) { }

bool do_heap_region(G1HeapRegion* r) override {
if (!r->is_free()) {
@@ -352,11 +360,11 @@
// that were allocated before the handshake; the handshake makes such
// regions' metadata visible to all threads, and we do not care about
// humongous regions that were allocated afterwards.
_sweep_state->reset_to_unclaimed(r->hrm_index());
_sweep_table->reset_to_unclaimed(r->hrm_index());
}
return false;
}
} cl(sweep_state);
} cl(sweep_table);
G1CollectedHeap::heap()->heap_region_iterate(&cl);
}

@@ -391,14 +399,31 @@
return _thread_control.initialize(this);
}

G1ConcurrentRefineWorkState& G1ConcurrentRefine::refine_state_for_merge() {
bool has_sweep_claims = refine_state().complete(false);
if (has_sweep_claims) {
log_debug(gc, refine)("Continue existing work");
} else {
// Refinement has been interrupted without having a snapshot. There may
// be a mix of already swapped and not-swapped card tables assigned to threads,
// so they might have already dirtied the swapped card tables.
// Conservatively scan all (non-free, non-committed) region's card tables,
// creating the snapshot right now.
log_debug(gc, refine)("Create work from scratch");

refine_state().snapshot_heap(false /* concurrent */);
}
return refine_state();
}

void G1ConcurrentRefine::run_with_refinement_workers(WorkerTask* task) {
_thread_control.run_task(task, num_threads_wanted());
}

void G1ConcurrentRefine::notify_region_reclaimed(G1HeapRegion* r) {
assert_at_safepoint();
if (_refine_state.is_in_progress()) {
_refine_state.sweep_state()->claim_all_cards(r->hrm_index());
_refine_state.sweep_table()->claim_all_cards(r->hrm_index());
}
}

@@ -517,7 +542,7 @@
size_t sampled_code_root_rs_length() const { return _sampled_code_root_rs_length; }
};

// Adjust the target length (in regions) of the young gen, based on the the
// Adjust the target length (in regions) of the young gen, based on the
// current length of the remembered sets.
//
// At the end of the GC G1 determines the length of the young gen based on
11 changes: 7 additions & 4 deletions src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp
Original file line number Diff line number Diff line change
@@ -113,7 +113,7 @@ class G1ConcurrentRefineWorkState {
size_t _refine_work_epoch;

// Current heap snapshot.
G1CardTableClaimTable* _sweep_state;
G1CardTableClaimTable* _sweep_table;

// Start times for all states.
Ticks _state_start[static_cast<uint>(State::Last)];
@@ -140,14 +140,15 @@ class G1ConcurrentRefineWorkState {
bool swap_global_card_table();
bool swap_java_threads_ct();
bool swap_gc_threads_ct();
void snapshot_heap();
void snapshot_heap(bool concurrent = true);
void sweep_rt_start();
bool sweep_rt_step();

bool complete(bool concurrent, bool print_log = true);

static void snapshot_heap_into(G1CardTableClaimTable* sweep_state);
static void snapshot_heap_into(G1CardTableClaimTable* sweep_table);

G1CardTableClaimTable* sweep_state() { return _sweep_state; }
G1CardTableClaimTable* sweep_table() { return _sweep_table; }
G1ConcurrentRefineStats* stats() { return &_stats; }
void reset_stats();

@@ -231,6 +232,8 @@ class G1ConcurrentRefine : public CHeapObj<mtGC> {

G1ConcurrentRefineWorkState& refine_state() { return _refine_state; }

G1ConcurrentRefineWorkState& refine_state_for_merge();

void run_with_refinement_workers(WorkerTask* task);

void notify_region_reclaimed(G1HeapRegion* r);
2 changes: 2 additions & 0 deletions src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp
Original file line number Diff line number Diff line change
@@ -177,6 +177,8 @@ void G1ConcurrentRefineThread::do_refinement() {

// 5. Sweep refinement table until done
bool interrupted_by_gc = false;

state.sweep_rt_start();
while (true) {
bool completed = state.sweep_rt_step();

56 changes: 18 additions & 38 deletions src/hotspot/share/gc/g1/g1RemSet.cpp
Original file line number Diff line number Diff line change
@@ -90,7 +90,7 @@
class G1RemSetScanState : public CHeapObj<mtGC> {
class G1DirtyRegions;

G1CardTableClaimTable _card_state;
G1CardTableClaimTable _card_claim_table;
// The complete set of regions which card table needs to be cleared at the end
// of GC because we scribbled over these card table entries.
//
@@ -142,11 +142,11 @@ class G1RemSetScanState : public CHeapObj<mtGC> {
}

void add_dirty_region(uint region) {
if (_contains[region]) {
return;
}
if (_contains[region]) {
return;
}

bool marked_as_dirty = Atomic::cmpxchg(&_contains[region], false, true) == false;
bool marked_as_dirty = Atomic::cmpxchg(&_contains[region], false, true) == false;
if (marked_as_dirty) {
uint allocated = Atomic::fetch_then_add(&_cur_idx, 1u);
_buffer[allocated] = region;
@@ -234,7 +234,7 @@ class G1ClearCardTableTask : public G1AbstractSubTask {

public:
G1RemSetScanState() :
_card_state(G1CollectedHeap::get_chunks_per_region_for_scan()),
_card_claim_table(G1CollectedHeap::get_chunks_per_region_for_scan()),
_all_dirty_regions(nullptr),
_next_dirty_regions(nullptr),
_scan_top(nullptr) { }
@@ -243,8 +243,8 @@ class G1ClearCardTableTask : public G1AbstractSubTask {
FREE_C_HEAP_ARRAY(HeapWord*, _scan_top);
}

void initialize(size_t max_reserved_regions) {
_card_state.initialize(max_reserved_regions);
void initialize(uint max_reserved_regions) {
_card_claim_table.initialize(max_reserved_regions);
_scan_top = NEW_C_HEAP_ARRAY(HeapWord*, max_reserved_regions, mtGC);
}

@@ -253,7 +253,7 @@ class G1ClearCardTableTask : public G1AbstractSubTask {
// become used during the collection these values must be valid
// for those regions as well.
void prepare() {
size_t max_reserved_regions = _card_state.max_reserved_regions();
size_t max_reserved_regions = _card_claim_table.max_reserved_regions();

for (size_t i = 0; i < max_reserved_regions; i++) {
clear_scan_top((uint)i);
@@ -268,7 +268,7 @@ class G1ClearCardTableTask : public G1AbstractSubTask {
// regions.
//assert(_next_dirty_regions->size() == 0, "next dirty regions must be empty");

_card_state.reset_all_claims_to_unclaimed();
_card_claim_table.reset_all_claims_to_unclaimed();
}

void complete_evac_phase(bool merge_dirty_regions) {
@@ -331,7 +331,7 @@ class G1ClearCardTableTask : public G1AbstractSubTask {
}

bool has_cards_to_scan(uint region) {
return _card_state.has_unclaimed_cards(region);
return _card_claim_table.has_unclaimed_cards(region);
}

void add_dirty_region(uint const region) {
@@ -367,7 +367,7 @@ class G1ClearCardTableTask : public G1AbstractSubTask {
}

G1CardTableChunkClaimer claimer(uint region_idx) {
return G1CardTableChunkClaimer(&_card_state, region_idx);
return G1CardTableChunkClaimer(&_card_claim_table, region_idx);
}
};

@@ -827,20 +827,20 @@ class MergeRefinementTableTask : public WorkerTask {
while (claim.has_next()) {
size_t const start_idx = region_card_base_idx + claim.value();

size_t* card_cur_card = (size_t*)card_table->byte_for_index(start_idx);
size_t* card_cur_word = (size_t*)card_table->byte_for_index(start_idx);

size_t* refinement_cur_card = (size_t*)refinement_table->byte_for_index(start_idx);
size_t* const refinement_end_card = refinement_cur_card + claim.size() / (sizeof(size_t) / sizeof(G1CardTable::CardValue));

for (; refinement_cur_card < refinement_end_card; ++refinement_cur_card, ++card_cur_card) {
for (; refinement_cur_card < refinement_end_card; ++refinement_cur_card, ++card_cur_word) {
size_t value = *refinement_cur_card;
*refinement_cur_card = G1CardTable::WordAllClean;
// Dirty is "0", so we need to logically-and here. This is also safe
// for all other possible values in the card table; at this point this
// can be either g1_dirty_card or g1_to_cset_card which will both be
// scanned.
size_t new_value = *card_cur_card & value;
*card_cur_card = new_value;
size_t new_value = *card_cur_word & value;
*card_cur_word = new_value;
}
}

@@ -1230,30 +1230,10 @@ class G1MergeHeapRootsTask : public WorkerTask {
static void merge_refinement_table() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();

G1CardTableClaimTable* claim;
G1CardTableClaimTable constructed(G1CollectedHeap::get_chunks_per_region_for_merge());

G1ConcurrentRefineWorkState& state = g1h->concurrent_refine()->refine_state();
bool has_sweep_claims = state.complete(false);
if (has_sweep_claims) {
log_debug(gc, refine)("Continue existing work");
claim = state.sweep_state();
} else {
// Refinement has been interrupted without having a snapshot. There may
// be a mix of already swapped and not-swapped card tables assigned to threads,
// so they might have already dirtied the swapped card tables.
// Conservatively scan all (non-free, non-committed) region's card tables,
// creating the snapshot right now.
log_debug(gc, refine)("Create work from scratch");

constructed.initialize(g1h->max_reserved_regions());
G1ConcurrentRefineWorkState::snapshot_heap_into(&constructed);
claim = &constructed;
}

G1ConcurrentRefineWorkState& state = g1h->concurrent_refine()->refine_state_for_merge();
WorkerThreads* workers = g1h->workers();

MergeRefinementTableTask cl(claim, workers->active_workers());
MergeRefinementTableTask cl(state.sweep_table(), workers->active_workers());
log_debug(gc, ergo)("Running %s using %u workers", cl.name(), workers->active_workers());
workers->run_task(&cl);
}