Skip to content

Commit 1fb9dad

Browse files
author
Kim Barrett
committedNov 25, 2022
8296419: [REDO] JDK-8295319: pending_cards_at_gc_start doesn't include cards in thread buffers
Reviewed-by: tschatzl, sjohanss
1 parent 2f47f83 commit 1fb9dad

9 files changed

+112
-109
lines changed
 

‎src/hotspot/share/gc/g1/g1CollectedHeap.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1063,7 +1063,7 @@ void G1CollectedHeap::abort_refinement() {
10631063
}
10641064

10651065
// Discard all remembered set updates and reset refinement statistics.
1066-
G1BarrierSet::dirty_card_queue_set().abandon_logs();
1066+
G1BarrierSet::dirty_card_queue_set().abandon_logs_and_stats();
10671067
assert(G1BarrierSet::dirty_card_queue_set().num_cards() == 0,
10681068
"DCQS should be empty");
10691069
concurrent_refine()->get_and_reset_refinement_stats();

‎src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp

+26-30
Original file line numberDiff line numberDiff line change
@@ -530,16 +530,13 @@ bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
530530
return true;
531531
}
532532

533-
void G1DirtyCardQueueSet::abandon_logs() {
533+
void G1DirtyCardQueueSet::abandon_logs_and_stats() {
534534
assert_at_safepoint();
535-
abandon_completed_buffers();
536-
_detached_refinement_stats.reset();
537535

538536
// Disable mutator refinement until concurrent refinement decides otherwise.
539537
set_mutator_refinement_threshold(SIZE_MAX);
540538

541-
// Since abandon is done only at safepoints, we can safely manipulate
542-
// these queues.
539+
// Iterate over all the threads, resetting per-thread queues and stats.
543540
struct AbandonThreadLogClosure : public ThreadClosure {
544541
G1DirtyCardQueueSet& _qset;
545542
AbandonThreadLogClosure(G1DirtyCardQueueSet& qset) : _qset(qset) {}
@@ -550,9 +547,16 @@ void G1DirtyCardQueueSet::abandon_logs() {
550547
}
551548
} closure(*this);
552549
Threads::threads_do(&closure);
550+
551+
enqueue_all_paused_buffers();
552+
abandon_completed_buffers();
553+
554+
// Reset stats from detached threads.
555+
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
556+
_detached_refinement_stats.reset();
553557
}
554558

555-
void G1DirtyCardQueueSet::concatenate_logs() {
559+
void G1DirtyCardQueueSet::concatenate_logs_and_stats() {
556560
assert_at_safepoint();
557561

558562
// Disable mutator refinement until concurrent refinement decides otherwise.
@@ -562,47 +566,39 @@ void G1DirtyCardQueueSet::concatenate_logs() {
562566
// the global list of logs.
563567
struct ConcatenateThreadLogClosure : public ThreadClosure {
564568
G1DirtyCardQueueSet& _qset;
565-
ConcatenateThreadLogClosure(G1DirtyCardQueueSet& qset) : _qset(qset) {}
569+
G1ConcurrentRefineStats _total_stats;
570+
571+
ConcatenateThreadLogClosure(G1DirtyCardQueueSet& qset) :
572+
_qset{qset}, _total_stats{} {}
573+
566574
virtual void do_thread(Thread* t) {
567575
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
576+
// Flush the buffer if non-empty. Flush before accumulating and
577+
// resetting stats, since flushing may modify the stats.
568578
if ((queue.buffer() != nullptr) &&
569579
(queue.index() != _qset.buffer_size())) {
570580
_qset.flush_queue(queue);
571581
}
582+
G1ConcurrentRefineStats& qstats = *queue.refinement_stats();
583+
_total_stats += qstats;
584+
qstats.reset();
572585
}
573586
} closure(*this);
574587
Threads::threads_do(&closure);
588+
_concatenated_refinement_stats = closure._total_stats;
575589

576590
enqueue_all_paused_buffers();
577591
verify_num_cards();
578-
}
579-
580-
G1ConcurrentRefineStats G1DirtyCardQueueSet::get_and_reset_refinement_stats() {
581-
assert_at_safepoint();
582-
583-
// Since we're at a safepoint, there aren't any races with recording of
584-
// detached refinement stats. In particular, there's no risk of double
585-
// counting a thread that detaches after we've examined it but before
586-
// we've processed the detached stats.
587-
588-
// Collect and reset stats for attached threads.
589-
struct CollectStats : public ThreadClosure {
590-
G1ConcurrentRefineStats _total_stats;
591-
virtual void do_thread(Thread* t) {
592-
G1DirtyCardQueue& dcq = G1ThreadLocalData::dirty_card_queue(t);
593-
G1ConcurrentRefineStats& stats = *dcq.refinement_stats();
594-
_total_stats += stats;
595-
stats.reset();
596-
}
597-
} closure;
598-
Threads::threads_do(&closure);
599592

600593
// Collect and reset stats from detached threads.
601594
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
602-
closure._total_stats += _detached_refinement_stats;
595+
_concatenated_refinement_stats += _detached_refinement_stats;
603596
_detached_refinement_stats.reset();
597+
}
604598

605-
return closure._total_stats;
599+
G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenated_refinement_stats() const {
600+
assert_at_safepoint();
601+
return _concatenated_refinement_stats;
606602
}
607603

608604
void G1DirtyCardQueueSet::record_detached_refinement_stats(G1ConcurrentRefineStats* stats) {

‎src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp

+16-11
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,10 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
9595
//
9696
// The paused buffers are conceptually an extension of the completed buffers
9797
// queue, and operations which need to deal with all of the queued buffers
98-
// (such as concatenate_logs) also need to deal with any paused buffers. In
99-
// general, if a safepoint performs a GC then the paused buffers will be
100-
// processed as part of it, and there won't be any paused buffers after a
101-
// GC safepoint.
98+
// (such as concatenating or abandoning logs) also need to deal with any
99+
// paused buffers. In general, if a safepoint performs a GC then the paused
100+
// buffers will be processed as part of it, and there won't be any paused
101+
// buffers after a GC safepoint.
102102
class PausedBuffers {
103103
class PausedList : public CHeapObj<mtGC> {
104104
BufferNode* volatile _head;
@@ -175,6 +175,7 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
175175

176176
G1FreeIdSet _free_ids;
177177

178+
G1ConcurrentRefineStats _concatenated_refinement_stats;
178179
G1ConcurrentRefineStats _detached_refinement_stats;
179180

180181
// Verify _num_cards == sum of cards in the completed queue.
@@ -267,17 +268,21 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
267268
size_t stop_at,
268269
G1ConcurrentRefineStats* stats);
269270

270-
// If a full collection is happening, reset partial logs, and release
271-
// completed ones: the full collection will make them all irrelevant.
272-
void abandon_logs();
271+
// If a full collection is happening, reset per-thread refinement stats and
272+
// partial logs, and release completed logs. The full collection will make
273+
// them all irrelevant.
274+
// precondition: at safepoint.
275+
void abandon_logs_and_stats();
273276

274-
// If any threads have partial logs, add them to the global list of logs.
275-
void concatenate_logs();
277+
// Collect and reset all the per-thread refinement stats. If any threads
278+
// have partial logs then add them to the global list.
279+
// precondition: at safepoint.
280+
void concatenate_logs_and_stats();
276281

277282
// Return the total of mutator refinement stats for all threads.
278-
// Also resets the stats for the threads.
279283
// precondition: at safepoint.
280-
G1ConcurrentRefineStats get_and_reset_refinement_stats();
284+
// precondition: only call after concatenate_logs_and_stats.
285+
G1ConcurrentRefineStats concatenated_refinement_stats() const;
281286

282287
// Accumulate refinement stats from threads that are detaching.
283288
void record_detached_refinement_stats(G1ConcurrentRefineStats* stats);

‎src/hotspot/share/gc/g1/g1ParScanThreadState.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id)
566566
if (_states[worker_id] == NULL) {
567567
_states[worker_id] =
568568
new G1ParScanThreadState(_g1h, rdcqs(),
569-
_preserved_marks_set->get(worker_id),
569+
_preserved_marks_set.get(worker_id),
570570
worker_id, _n_workers,
571571
_young_cset_length, _optional_cset_length,
572572
_evac_failure_regions);
@@ -687,22 +687,21 @@ void G1ParScanThreadState::update_numa_stats(uint node_index) {
687687
}
688688

689689
G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
690-
G1RedirtyCardsQueueSet* rdcqs,
691-
PreservedMarksSet* preserved_marks_set,
692690
uint n_workers,
693691
size_t young_cset_length,
694692
size_t optional_cset_length,
695693
G1EvacFailureRegions* evac_failure_regions) :
696694
_g1h(g1h),
697-
_rdcqs(rdcqs),
698-
_preserved_marks_set(preserved_marks_set),
695+
_rdcqs(G1BarrierSet::dirty_card_queue_set().allocator()),
696+
_preserved_marks_set(true /* in_c_heap */),
699697
_states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC)),
700698
_surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, young_cset_length + 1, mtGC)),
701699
_young_cset_length(young_cset_length),
702700
_optional_cset_length(optional_cset_length),
703701
_n_workers(n_workers),
704702
_flushed(false),
705703
_evac_failure_regions(evac_failure_regions) {
704+
_preserved_marks_set.init(n_workers);
706705
for (uint i = 0; i < n_workers; ++i) {
707706
_states[i] = NULL;
708707
}
@@ -713,4 +712,6 @@ G1ParScanThreadStateSet::~G1ParScanThreadStateSet() {
713712
assert(_flushed, "thread local state from the per thread states should have been flushed");
714713
FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
715714
FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
715+
_preserved_marks_set.assert_empty();
716+
_preserved_marks_set.reclaim();
716717
}

‎src/hotspot/share/gc/g1/g1ParScanThreadState.hpp

+5-6
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "gc/shared/ageTable.hpp"
3434
#include "gc/shared/copyFailedInfo.hpp"
3535
#include "gc/shared/partialArrayTaskStepper.hpp"
36+
#include "gc/shared/preservedMarks.hpp"
3637
#include "gc/shared/stringdedup/stringDedup.hpp"
3738
#include "gc/shared/taskqueue.hpp"
3839
#include "memory/allocation.hpp"
@@ -229,8 +230,8 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
229230

230231
class G1ParScanThreadStateSet : public StackObj {
231232
G1CollectedHeap* _g1h;
232-
G1RedirtyCardsQueueSet* _rdcqs;
233-
PreservedMarksSet* _preserved_marks_set;
233+
G1RedirtyCardsQueueSet _rdcqs;
234+
PreservedMarksSet _preserved_marks_set;
234235
G1ParScanThreadState** _states;
235236
size_t* _surviving_young_words_total;
236237
size_t _young_cset_length;
@@ -241,16 +242,14 @@ class G1ParScanThreadStateSet : public StackObj {
241242

242243
public:
243244
G1ParScanThreadStateSet(G1CollectedHeap* g1h,
244-
G1RedirtyCardsQueueSet* rdcqs,
245-
PreservedMarksSet* preserved_marks_set,
246245
uint n_workers,
247246
size_t young_cset_length,
248247
size_t optional_cset_length,
249248
G1EvacFailureRegions* evac_failure_regions);
250249
~G1ParScanThreadStateSet();
251250

252-
G1RedirtyCardsQueueSet* rdcqs() { return _rdcqs; }
253-
PreservedMarksSet* preserved_marks_set() { return _preserved_marks_set; }
251+
G1RedirtyCardsQueueSet* rdcqs() { return &_rdcqs; }
252+
PreservedMarksSet* preserved_marks_set() { return &_preserved_marks_set; }
254253

255254
void flush_stats();
256255
void record_unused_optional_region(HeapRegion* hr);

‎src/hotspot/share/gc/g1/g1Policy.cpp

+6-11
Original file line numberDiff line numberDiff line change
@@ -586,12 +586,14 @@ static void log_refinement_stats(const char* kind, const G1ConcurrentRefineStats
586586
stats.dirtied_cards());
587587
}
588588

589-
void G1Policy::record_concurrent_refinement_stats() {
590-
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
591-
_pending_cards_at_gc_start = dcqs.num_cards();
589+
void G1Policy::record_concurrent_refinement_stats(size_t pending_cards,
590+
size_t thread_buffer_cards) {
591+
_pending_cards_at_gc_start = pending_cards;
592+
_analytics->report_dirtied_cards_in_thread_buffers(thread_buffer_cards);
592593

593594
// Collect per-thread stats, mostly from mutator activity.
594-
G1ConcurrentRefineStats mut_stats = dcqs.get_and_reset_refinement_stats();
595+
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
596+
G1ConcurrentRefineStats mut_stats = dcqs.concatenated_refinement_stats();
595597

596598
// Collect specialized concurrent refinement thread stats.
597599
G1ConcurrentRefine* cr = _g1h->concurrent_refine();
@@ -628,11 +630,6 @@ void G1Policy::record_concurrent_refinement_stats() {
628630
}
629631
}
630632

631-
void G1Policy::record_concatenate_dirty_card_logs(Tickspan concat_time, size_t num_cards) {
632-
_analytics->report_dirtied_cards_in_thread_buffers(num_cards);
633-
phase_times()->record_concatenate_dirty_card_logs_time_ms(concat_time.seconds() * MILLIUNITS);
634-
}
635-
636633
void G1Policy::record_young_collection_start() {
637634
Ticks now = Ticks::now();
638635
// We only need to do this here as the policy will only be applied
@@ -647,8 +644,6 @@ void G1Policy::record_young_collection_start() {
647644

648645
phase_times()->record_cur_collection_start_sec(now.seconds());
649646

650-
record_concurrent_refinement_stats();
651-
652647
_collection_set->reset_bytes_used_before();
653648

654649
// do that for any other surv rate groups

‎src/hotspot/share/gc/g1/g1Policy.hpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -291,9 +291,6 @@ class G1Policy: public CHeapObj<mtGC> {
291291
// Indicate that we aborted marking before doing any mixed GCs.
292292
void abort_time_to_mixed_tracking();
293293

294-
// Record and log stats before not-full collection.
295-
void record_concurrent_refinement_stats();
296-
297294
public:
298295

299296
G1Policy(STWGCTimer* gc_timer);
@@ -312,8 +309,6 @@ class G1Policy: public CHeapObj<mtGC> {
312309
// This should be called after the heap is resized.
313310
void record_new_heap_size(uint new_number_of_regions);
314311

315-
void record_concatenate_dirty_card_logs(Tickspan concat_time, size_t num_cards);
316-
317312
void init(G1CollectedHeap* g1h, G1CollectionSet* collection_set);
318313

319314
// Record the start and end of the young gc pause.
@@ -411,6 +406,12 @@ class G1Policy: public CHeapObj<mtGC> {
411406

412407
void transfer_survivors_to_cset(const G1SurvivorRegions* survivors);
413408

409+
// Record and log stats and pending cards before not-full collection.
410+
// thread_buffer_cards is the number of cards that were in per-thread
411+
// buffers. pending_cards includes thread_buffer_cards.
412+
void record_concurrent_refinement_stats(size_t pending_cards,
413+
size_t thread_buffer_cards);
414+
414415
private:
415416
//
416417
// Survivor regions policy.

‎src/hotspot/share/gc/g1/g1YoungCollector.cpp

+42-38
Original file line numberDiff line numberDiff line change
@@ -467,36 +467,59 @@ void G1YoungCollector::set_young_collection_default_active_worker_threads(){
467467
log_info(gc,task)("Using %u workers of %u for evacuation", active_workers, workers()->max_workers());
468468
}
469469

470-
void G1YoungCollector::flush_dirty_card_queues() {
470+
void G1YoungCollector::retire_tlabs() {
471+
Ticks start = Ticks::now();
472+
_g1h->retire_tlabs();
473+
double retire_time = (Ticks::now() - start).seconds() * MILLIUNITS;
474+
phase_times()->record_prepare_tlab_time_ms(retire_time);
475+
}
476+
477+
void G1YoungCollector::concatenate_dirty_card_logs_and_stats() {
471478
Ticks start = Ticks::now();
472479
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
473480
size_t old_cards = qset.num_cards();
474-
qset.concatenate_logs();
475-
size_t added_cards = qset.num_cards() - old_cards;
476-
Tickspan concat_time = Ticks::now() - start;
477-
policy()->record_concatenate_dirty_card_logs(concat_time, added_cards);
481+
qset.concatenate_logs_and_stats();
482+
size_t pending_cards = qset.num_cards();
483+
size_t thread_buffer_cards = pending_cards - old_cards;
484+
policy()->record_concurrent_refinement_stats(pending_cards, thread_buffer_cards);
485+
double concat_time = (Ticks::now() - start).seconds() * MILLIUNITS;
486+
phase_times()->record_concatenate_dirty_card_logs_time_ms(concat_time);
487+
}
488+
489+
#ifdef ASSERT
490+
void G1YoungCollector::verify_empty_dirty_card_logs() const {
491+
struct Verifier : public ThreadClosure {
492+
size_t _buffer_size;
493+
Verifier() : _buffer_size(G1BarrierSet::dirty_card_queue_set().buffer_size()) {}
494+
void do_thread(Thread* t) override {
495+
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
496+
assert((queue.buffer() == nullptr) || (queue.index() == _buffer_size),
497+
"non-empty dirty card queue for thread");
498+
}
499+
} verifier;
500+
Threads::threads_do(&verifier);
478501
}
502+
#endif // ASSERT
503+
504+
void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info) {
505+
// Flush early, so later phases don't need to account for per-thread stuff.
506+
// Flushes deferred card marks, so must precede concatenating logs.
507+
retire_tlabs();
508+
509+
// Flush early, so later phases don't need to account for per-thread stuff.
510+
concatenate_dirty_card_logs_and_stats();
511+
512+
calculate_collection_set(evacuation_info, policy()->max_pause_time_ms());
479513

480-
void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info, G1ParScanThreadStateSet* per_thread_states) {
481514
// Please see comment in g1CollectedHeap.hpp and
482515
// G1CollectedHeap::ref_processing_init() to see how
483516
// reference processing currently works in G1.
484517
ref_processor_stw()->start_discovery(false /* always_clear */);
485518

486-
_evac_failure_regions.pre_collection(_g1h->max_reserved_regions());
519+
_evac_failure_regions.pre_collection(_g1h->max_reserved_regions());
487520

488521
_g1h->gc_prologue(false);
489522

490-
{
491-
Ticks start = Ticks::now();
492-
_g1h->retire_tlabs();
493-
phase_times()->record_prepare_tlab_time_ms((Ticks::now() - start).seconds() * 1000.0);
494-
}
495-
496-
// Flush dirty card queues to qset, so later phases don't need to account
497-
// for partially filled per-thread queues and such.
498-
flush_dirty_card_queues();
499-
500523
hot_card_cache()->reset_hot_cache_claimed_index();
501524

502525
// Initialize the GC alloc regions.
@@ -519,7 +542,7 @@ void G1YoungCollector::pre_evacuate_collection_set(G1EvacInfo* evacuation_info,
519542
}
520543

521544
assert(_g1h->verifier()->check_region_attr_table(), "Inconsistency in the region attributes table.");
522-
per_thread_states->preserved_marks_set()->assert_empty();
545+
verify_empty_dirty_card_logs();
523546

524547
#if COMPILER2_OR_JVMCI
525548
DerivedPointerTable::clear();
@@ -1013,19 +1036,6 @@ bool G1YoungCollector::evacuation_failed() const {
10131036
return _evac_failure_regions.evacuation_failed();
10141037
}
10151038

1016-
class G1PreservedMarksSet : public PreservedMarksSet {
1017-
public:
1018-
1019-
G1PreservedMarksSet(uint num_workers) : PreservedMarksSet(true /* in_c_heap */) {
1020-
init(num_workers);
1021-
}
1022-
1023-
virtual ~G1PreservedMarksSet() {
1024-
assert_empty();
1025-
reclaim();
1026-
}
1027-
};
1028-
10291039
G1YoungCollector::G1YoungCollector(GCCause::Cause gc_cause) :
10301040
_g1h(G1CollectedHeap::heap()),
10311041
_gc_cause(gc_cause),
@@ -1073,20 +1083,14 @@ void G1YoungCollector::collect() {
10731083
// other trivial setup above).
10741084
policy()->record_young_collection_start();
10751085

1076-
calculate_collection_set(jtm.evacuation_info(), policy()->max_pause_time_ms());
1086+
pre_evacuate_collection_set(jtm.evacuation_info());
10771087

1078-
G1RedirtyCardsQueueSet rdcqs(G1BarrierSet::dirty_card_queue_set().allocator());
1079-
G1PreservedMarksSet preserved_marks_set(workers()->active_workers());
10801088
G1ParScanThreadStateSet per_thread_states(_g1h,
1081-
&rdcqs,
1082-
&preserved_marks_set,
10831089
workers()->active_workers(),
10841090
collection_set()->young_region_length(),
10851091
collection_set()->optional_region_length(),
10861092
&_evac_failure_regions);
10871093

1088-
pre_evacuate_collection_set(jtm.evacuation_info(), &per_thread_states);
1089-
10901094
bool may_do_optional_evacuation = collection_set()->optional_region_length() != 0;
10911095
// Actually do the work...
10921096
evacuate_initial_collection_set(&per_thread_states, may_do_optional_evacuation);

‎src/hotspot/share/gc/g1/g1YoungCollector.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,11 @@ class G1YoungCollector {
9898

9999
void set_young_collection_default_active_worker_threads();
100100

101-
void flush_dirty_card_queues();
101+
void retire_tlabs();
102+
void concatenate_dirty_card_logs_and_stats();
103+
void verify_empty_dirty_card_logs() const NOT_DEBUG_RETURN;
102104

103-
void pre_evacuate_collection_set(G1EvacInfo* evacuation_info, G1ParScanThreadStateSet* pss);
105+
void pre_evacuate_collection_set(G1EvacInfo* evacuation_info);
104106
// Actually do the work of evacuating the parts of the collection set.
105107
// The has_optional_evacuation_work flag for the initial collection set
106108
// evacuation indicates whether one or more optional evacuation steps may

1 commit comments

Comments
 (1)

openjdk-notifier[bot] commented on Nov 25, 2022

@openjdk-notifier[bot]
Please sign in to comment.