Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8310031: Parallel: Implement better work distribution for large object arrays in old gen #14846

Closed
Changes from 2 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
53c7b66
8310031: Parallel: Implement better work distribution for large objec…
reinrich Jun 20, 2023
0eb924e
Make sure to skip stripes where no object starts
reinrich Jul 26, 2023
5b802ed
Limit effect of previous commit to large array handling
reinrich Jul 27, 2023
d7ab2b0
Apply Thomas' suggestions
reinrich Sep 11, 2023
67edf28
Merge branch 'master'
reinrich Sep 11, 2023
d535a10
objArrayOopDesc::oop_oop_iterate_bounded must be defined in objArrayO…
reinrich Sep 12, 2023
9a2b230
100 stripes per active worker thread
reinrich Sep 14, 2023
3e6c1b7
Scan large array stripe from first dirty card to stripe end
reinrich Sep 18, 2023
71a3c44
Revert back to precise scanning of large object arrays
reinrich Sep 19, 2023
bba1d2a
find_first_clean_card: avoid expensive start array queries on long ar…
reinrich Sep 20, 2023
ac6bddb
Avoid expensive start array queries on long arrays
reinrich Sep 20, 2023
86747ff
Feedback Thomas
reinrich Sep 22, 2023
268e208
Small clean-ups
reinrich Sep 25, 2023
6033358
Limit stripe size to 1m with at least 8 threads
reinrich Sep 25, 2023
d4b5fd4
Do all large array scanning in separate method
reinrich Sep 22, 2023
a4d6af9
First card of large array should be cleared if dirty
reinrich Sep 25, 2023
d75bd60
Eliminate special case for scanning the large array end
reinrich Sep 26, 2023
50737dd
Remove stripe size adaptations and cache potentially expensive start …
reinrich Sep 27, 2023
780a03d
Reset to master
reinrich Sep 28, 2023
817b164
Split work strictly at stripe boundaries
reinrich Oct 5, 2023
22fe849
Parallel copying of imprecise marks to stripes
reinrich Oct 5, 2023
78a08cf
Overlap scavenge with pre-scavenge
reinrich Oct 6, 2023
d845e65
Missed acquire semantics
reinrich Oct 6, 2023
8b13c83
Cleanup
reinrich Oct 9, 2023
272ab97
find_first_clean_card: return end_card if final object extends beyond…
reinrich Oct 6, 2023
8b544d8
Shadow table per stripe
reinrich Oct 9, 2023
8b22c28
Cleanup
reinrich Oct 11, 2023
e212413
Don't overlap card table processing with scavenging for simplicity
reinrich Oct 11, 2023
1462bbf
Simplification suggested by Albert
reinrich Oct 11, 2023
c9e040f
Make sure to scan obj reaching in just once
reinrich Oct 12, 2023
443f482
Re-cleanup (was accidentally reverted)
reinrich Oct 12, 2023
381e001
Merge branch 'master'
reinrich Oct 12, 2023
d12e96e
Feedback Albert
reinrich Oct 12, 2023
607f0c2
Remove obsolete comment
reinrich Oct 13, 2023
bbb128e
Merge branch 'master'
reinrich Oct 19, 2023
f796551
Use better name: _preprocessing_active_workers
reinrich Oct 19, 2023
26f0636
preprocess_card_table_parallel should be private
reinrich Oct 19, 2023
7843a02
Small cleanup changes suggested by Thomas.
reinrich Oct 19, 2023
bd853c4
More small changes Thomas suggested (line-breaks needed)
reinrich Oct 19, 2023
fd5d072
Review Thomas
reinrich Oct 19, 2023
7c20c9f
Cleanup/improve comments
reinrich Oct 20, 2023
67416b2
Accepting Thomas' (smaller) suggestions
reinrich Oct 20, 2023
a443042
Review Thomas (PSStripeShadowCardTable)
reinrich Oct 20, 2023
71b0848
Forgot to move comment to PSStripeShadowCardTable.
reinrich Oct 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 63 additions & 30 deletions src/hotspot/share/gc/parallel/psCardTable.cpp
Original file line number Diff line number Diff line change
@@ -33,6 +33,7 @@
#include "oops/access.inline.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/prefetch.inline.hpp"
#include "utilities/spinYield.hpp"
#include "utilities/align.hpp"

// Checks an individual oop for missing precise marks. Mark
@@ -212,8 +213,11 @@ class ObjStartCache : public StackObj {
}
};

void PSCardTable::pre_scavenge(uint active_workers) {
_scavenge_phase1_active_workers = active_workers;
void PSCardTable::pre_scavenge(HeapWord* old_gen_bottom, uint active_workers) {
_pre_scavenge_active_workers = active_workers;
_pre_scavenge_current_goal_active_workers = active_workers;
_pre_scavenge_current_goal = old_gen_bottom + _pre_scavenge_sync_interval;
_pre_scavenge_completed_top = nullptr;
}

void PSCardTable::clear_cards(CardValue* const start, CardValue* const end) {
@@ -327,6 +331,50 @@ void PSCardTable::process_range(T& start_cache,
}
}

// Propagate imprecise card marks from object start to the stripes an object extends to.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Propagate imprecise card marks from object start to the stripes an object extends to.
// Propagate imprecise card marks from object start to all stripes an object extends to this thread is assigned to.

(I saw that this is actually duplicated from the .hpp file. Better to improve the one in the .hpp file and remove this one)

// Pre-scavenging and scavenging can overlap.
void PSCardTable::pre_scavenge_parallel(ObjectStartArray* start_array,
HeapWord* old_gen_bottom,
HeapWord* old_gen_top,
uint stripe_index,
uint n_stripes) {
const uint active_workers = n_stripes;
const size_t num_cards_in_slice = num_cards_in_stripe * n_stripes;
CardValue* cur_card = byte_for(old_gen_bottom) + stripe_index * num_cards_in_stripe;
CardValue* const end_card = byte_for(old_gen_top - 1) + 1;
HeapWord* signaled_goal = nullptr;
ObjStartCache start_cache(start_array);

for ( /* empty */ ; cur_card < end_card; cur_card += num_cards_in_slice) {
HeapWord* stripe_addr = addr_for(cur_card);
if (!is_dirty(cur_card)) {
HeapWord* first_obj_addr = start_cache.object_start(stripe_addr);
if (first_obj_addr < stripe_addr) {
oop first_obj = cast_to_oop(first_obj_addr);
if (!first_obj->is_array() && is_dirty(byte_for(first_obj_addr))) {
// Potentially imprecisely marked dirty.
// Mark first card of stripe dirty too.
*cur_card = dirty_card_val();
}
}
}
// Synchronization with already scavenging threads.
if (signaled_goal < _pre_scavenge_current_goal && _pre_scavenge_current_goal <= stripe_addr) {
signaled_goal = (HeapWord*) _pre_scavenge_current_goal;
Atomic::dec(&_pre_scavenge_current_goal_active_workers);
if (_pre_scavenge_current_goal_active_workers == 0) {
// We're the last one to reach the current goal.
// Set completed top.
_pre_scavenge_completed_top = _pre_scavenge_current_goal;
// Set next goal.
_pre_scavenge_current_goal_active_workers = n_stripes;
Atomic::add(&_pre_scavenge_current_goal, _pre_scavenge_sync_interval);
}
}
}
Atomic::dec(&_pre_scavenge_active_workers);
}

// We get passed the space_top value to prevent us from traversing into
// the old_gen promotion labs, which cannot be safely parsed.

@@ -374,43 +422,28 @@ void PSCardTable::scavenge_contents_parallel(ObjectStartArray* start_array,
const size_t stripe_size_in_words = num_cards_in_stripe * _card_size_in_words;
const size_t slice_size_in_words = stripe_size_in_words * n_stripes;

// Propagate imprecise marks from object start to the stripes the object extends to.
{
const size_t num_cards_in_slice = num_cards_in_stripe * n_stripes;
CardValue* cur_card = byte_for(old_gen_bottom) + stripe_index * num_cards_in_stripe;
CardValue* const space_top_card = byte_for(old_gen_top);

ObjStartCache start_cache(start_array);
for ( /* empty */ ; cur_card < space_top_card; cur_card += num_cards_in_slice) {
if (!is_dirty(cur_card)) {
HeapWord* stripe_addr = addr_for(cur_card);
HeapWord* first_obj_addr = start_cache.object_start(stripe_addr);
if (first_obj_addr < stripe_addr) {
oop first_obj = cast_to_oop(first_obj_addr);
if (!first_obj->is_array() && is_dirty(byte_for(first_obj_addr))) {
// Potentially imprecisely marked dirty.
// Mark first card of stripe dirty too.
*cur_card = dirty_card_val();
}
}
}
}

// Synchronize with co-worker threads.
Atomic::dec(&_scavenge_phase1_active_workers);
while(_scavenge_phase1_active_workers > 0) {
os::naked_short_sleep(0);
}
}
// Prepare scavenge
pre_scavenge_parallel(start_array, old_gen_bottom, old_gen_top, stripe_index, n_stripes);

// Scavenge
HeapWord* cur_stripe_addr = old_gen_bottom + stripe_index * stripe_size_in_words;
ObjStartCache start_cache(start_array);
bool pre_scavenge_complete = false;
for (/* empty */; cur_stripe_addr < old_gen_top; cur_stripe_addr += slice_size_in_words) {
HeapWord* const stripe_l = cur_stripe_addr;
HeapWord* const stripe_r = MIN2(cur_stripe_addr + stripe_size_in_words,
old_gen_top);

// Sync with concurrent pre-scavenge.
if (!pre_scavenge_complete) {
SpinYield spin;
while (Atomic::load_acquire(&_pre_scavenge_active_workers) != 0 &&
cur_stripe_addr > Atomic::load_acquire(&_pre_scavenge_completed_top)) {
spin.wait();
}
pre_scavenge_complete = Atomic::load_acquire(&_pre_scavenge_active_workers) == 0;
}

process_range(start_cache, pm, stripe_l, stripe_r);
}
}
27 changes: 24 additions & 3 deletions src/hotspot/share/gc/parallel/psCardTable.hpp
Original file line number Diff line number Diff line change
@@ -37,7 +37,15 @@ class PSCardTable: public CardTable {
static constexpr size_t num_cards_in_stripe = 128;
static_assert(num_cards_in_stripe >= 1, "progress");

volatile int _scavenge_phase1_active_workers;
// Pre-scavenge support.
// The pre-scavenge phase can overlap with scavenging.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this obsolete?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh sure. It's obsolete now. I removed it.

static size_t constexpr _pre_scavenge_sync_interval = 1*G;
volatile HeapWord* _pre_scavenge_current_goal;
volatile int _pre_scavenge_current_goal_active_workers;
// A stripe is ready for scavenge if it's start is not higher then this.
volatile HeapWord* _pre_scavenge_completed_top;
// All stripes are ready for scavenge if all threads have completed pre-scavenge.
volatile int _pre_scavenge_active_workers;

bool is_dirty(CardValue* card) {
return !is_clean(card);
@@ -79,14 +87,27 @@ class PSCardTable: public CardTable {
void clear_cards(CardValue* const start, CardValue* const end);

public:
PSCardTable(MemRegion whole_heap) : CardTable(whole_heap), _scavenge_phase1_active_workers(0) {}
PSCardTable(MemRegion whole_heap) : CardTable(whole_heap),
_pre_scavenge_current_goal(nullptr),
_pre_scavenge_current_goal_active_workers(0),
_pre_scavenge_completed_top(nullptr),
_pre_scavenge_active_workers(0) {}

static CardValue youngergen_card_val() { return youngergen_card; }
static CardValue verify_card_val() { return verify_card; }

void pre_scavenge(uint active_workers);
void pre_scavenge(HeapWord* old_gen_bottom, uint active_workers);

// Scavenge support

// Propagate imprecise card marks from object start to the stripes an object extends to.
// Pre-scavenging and scavenging can overlap.
void pre_scavenge_parallel(ObjectStartArray* start_array,
HeapWord* old_gen_bottom,
HeapWord* old_gen_top,
uint stripe_index,
uint n_stripes);

void scavenge_contents_parallel(ObjectStartArray* start_array,
HeapWord* old_gen_bottom,
HeapWord* old_gen_top,
2 changes: 1 addition & 1 deletion src/hotspot/share/gc/parallel/psScavenge.cpp
Original file line number Diff line number Diff line change
@@ -303,7 +303,7 @@ class ScavengeRootsTask : public WorkerTask {

if (!_is_old_gen_empty) {
PSCardTable* card_table = ParallelScavengeHeap::heap()->card_table();
card_table->pre_scavenge(active_workers);
card_table->pre_scavenge(_old_gen->object_space()->bottom(), active_workers);
}
}