[Gc] Fix for Win32 PARALLEL_MARK
Ivan Maidanski
ivmai at mail.ru
Sun Oct 26 07:16:32 PST 2008
Hi!
This patch is for my previous patch introducing parallel marking on Win32 and Cygwin. My previous implementation of Win32 GC_wait_marker() turns (frequently when GC_MARKERS > ncpu) into simple yielding (thus uselessly consuming CPU time outside garbage collections).
Now I'm using PulseEvent and atomic NT SignalObjectAndWait() (on Windows 98/Me parallel marking is set disabled but it's ok since multicore CPUs are not supported by these OS, AFAIK).
Mark lock spinning is disabled by default (unless MARKLOCK_USES_SPIN) but it works ok. More careful measurements should be done.
Tested with MinGW and VC++ 32-bit on dual core CPU.
Still some questions remain:
- should we scan markers in GC_get_next_stack()?;
- should we use .._full() version (or weaker) of CAS in GC_acquire_mark_lock()?
PS. This patch also adjusts ..._printf() actual params (according to their format specifiers) for "win32_threads.c" file.
Bye.
-------------- next part --------------
diff -ru bdwgc/win32_threads.c updated/bdwgc/win32_threads.c
--- bdwgc/win32_threads.c 2008-10-24 17:42:24.000000000 +0400
+++ updated/bdwgc/win32_threads.c 2008-10-26 13:16:20.000000000 +0300
@@ -402,8 +402,7 @@
0,
0,
DUPLICATE_SAME_ACCESS)) {
- DWORD last_error = GetLastError();
- GC_err_printf("Last error code: %d\n", last_error);
+ GC_err_printf("Last error code: %d\n", (int)GetLastError());
ABORT("DuplicateHandle failed");
}
me -> last_stack_min = ADDR_LIMIT;
@@ -947,7 +946,7 @@
if (sp >= stack_min && sp < thread->stack_base) {
# ifdef DEBUG_THREADS
GC_printf("Pushing thread from %p to %p for 0x%x from 0x%x\n",
- sp, thread -> stack_base, thread -> id, me);
+ sp, thread -> stack_base, (int)thread -> id, (int)me);
# endif
GC_push_all_stack(sp, thread->stack_base);
} else {
@@ -968,7 +967,7 @@
DWORD me = GetCurrentThreadId();
GC_bool found_me = FALSE;
# ifndef SMALL_CONFIG
- size_t nthreads = 0;
+ unsigned nthreads = 0;
# endif
if (GC_win32_dll_threads) {
@@ -1154,7 +1153,8 @@
for (i = 0; i < GC_markers - 1; ++i) {
if (0 != pthread_create(&new_thread, &attr,
GC_mark_thread, (void *)(word)i)) {
- WARN("Marker thread creation failed, errno = %d.\n", errno);
+ WARN("Marker thread creation failed, errno = %ld.\n",
+ /* (word) */ errno);
}
}
pthread_attr_destroy(&attr);
@@ -1314,14 +1314,19 @@
#define SPIN_MAX 128 /* Maximum number of calls to GC_pause before */
#endif /* give up. */
+volatile unsigned GC_spinning_disabled; /* don't spin if non-zero */
+
STATIC int GC_spin_and_trylock(volatile AO_t *pmutex_waitcnt)
{
unsigned pause_length;
unsigned i;
GC_pause();
for (pause_length = 2; pause_length <= SPIN_MAX; pause_length <<= 1) {
+ /* FIXME: Should we use ..._full version of CAS here (and below)? */
if (AO_compare_and_swap(pmutex_waitcnt, 0, 1))
return 0;
+ if (GC_spinning_disabled)
+ break;
for (i = 0; i < pause_length; ++i)
GC_pause();
}
@@ -1345,11 +1350,14 @@
/* Here we implement the same spinning algorithm as */
/* in GC_generic_lock() in pthread_support.c. */
+ if (GC_spinning_disabled)
+ goto nospinning;
if (!AO_compare_and_swap(&GC_mark_mutex_waitcnt, 0, 1)) {
if (GC_spin_and_trylock(&GC_mark_mutex_waitcnt) < 0) {
+ nospinning:
# endif
- if ((signed_word)AO_fetch_and_add1_full(&GC_mark_mutex_waitcnt) > 0) {
+ if (AO_fetch_and_add1_full(&GC_mark_mutex_waitcnt) != 0) {
# ifdef LOCK_STATS
++GC_block_count;
# endif
@@ -1389,7 +1397,8 @@
# ifdef GC_ASSERTIONS
GC_mark_lock_holder = NO_THREAD;
# endif
- if ((signed_word)AO_fetch_and_sub1_full(&GC_mark_mutex_waitcnt) > 1 &&
+ GC_ASSERT(GC_mark_mutex_waitcnt != 0);
+ if (AO_fetch_and_sub1_full(&GC_mark_mutex_waitcnt) > 1 &&
SetEvent(mark_mutex_event) == FALSE)
ABORT("SetEvent() failed");
}
@@ -1431,36 +1440,81 @@
/* different places (and, in addition, notifying is done after leaving */
/* critical section) and this could result in a signal loosing between */
/* checking for a particular condition and calling WaitForSingleObject. */
-/* So, we use "auto-reset" Event and propagate a broadcast manually */
-/* (unless the current thread is the only one waiting on mark_cv). */
-
-STATIC HANDLE mark_cv = (HANDLE)0; /* Event with auto-reset */
-
-STATIC unsigned mark_cv_waiter_cnt = 0;
+/* So, we use PulseEvent() and NT SignalObjectAndWait() (which */
+/* atomically sets mutex event to signaled state and starts waiting on */
+/* condvar). A special case here is GC_mark_mutex_waitcnt == 1 (i.e. */
+/* nobody waits for mark lock at this moment) - we don't change it */
+/* (otherwise we may loose a signal sent between decrementing */
+/* GC_mark_mutex_waitcnt and calling WaitForSingleObject()). */
+
+STATIC HANDLE mark_cv = (HANDLE)0; /* Event with manual reset */
+
+typedef DWORD (WINAPI * SignalObjectAndWait_type)(
+ HANDLE, HANDLE, DWORD, BOOL);
+STATIC SignalObjectAndWait_type signalObjectAndWait_func = 0;
void GC_wait_marker(void)
{
/* Here we assume that GC_wait_marker() is always called */
/* from a while(check_cond) loop. */
+ AO_t waitcnt;
GC_ASSERT(mark_cv != 0);
- mark_cv_waiter_cnt++;
- GC_release_mark_lock();
- if (WaitForSingleObject(mark_cv, INFINITE) == WAIT_FAILED)
- ABORT("WaitForSingleObject() failed");
- /* WaitForSingleObject() unblocks one thread and resets mark_cv */
- GC_acquire_mark_lock();
- /* Unblock one more waiting thread (if any) */
- if (--mark_cv_waiter_cnt != 0 && SetEvent(mark_cv) == FALSE)
- ABORT("SetEvent() failed");
+ GC_ASSERT(signalObjectAndWait_func != 0);
+
+ /* We inline GC_release_mark_lock() to have atomic */
+ /* unlock-and-wait action here. */
+ GC_ASSERT(GC_mark_lock_holder == (unsigned long)GetCurrentThreadId());
+# ifdef GC_ASSERTIONS
+ GC_mark_lock_holder = NO_THREAD;
+# endif
+
+ if ((waitcnt = GC_mark_mutex_waitcnt) > 1) {
+ (void)AO_fetch_and_sub1_full(&GC_mark_mutex_waitcnt);
+ } else {
+ GC_ASSERT(waitcnt != 0);
+# ifdef MARKLOCK_USES_SPIN
+ /* Avoid spinning since GC_mark_mutex_waitcnt is guaranteed */
+ /* to be non-zero till the end of this function. */
+ GC_spinning_disabled++;
+# endif
+ }
+
+ /* The state of mark_cv is non-signaled here. */
+ if ((*signalObjectAndWait_func)(mark_mutex_event /* hObjectToSignal */,
+ mark_cv /* hObjectToWaitOn */,
+ INFINITE /* timeout */,
+ FALSE /* isAlertable */) == WAIT_FAILED)
+ ABORT("SignalObjectAndWait() failed");
+ /* The state of mark_cv is non-signaled here again. */
+
+ if (waitcnt > 1) {
+ GC_acquire_mark_lock();
+ } else {
+ GC_ASSERT(GC_mark_mutex_waitcnt != 0);
+ /* Acquire mark lock */
+ if (WaitForSingleObject(mark_mutex_event, INFINITE) == WAIT_FAILED)
+ ABORT("WaitForSingleObject() failed");
+ GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
+# ifdef GC_ASSERTIONS
+ GC_mark_lock_holder = (unsigned long)GetCurrentThreadId();
+# endif
+# ifdef MARKLOCK_USES_SPIN
+ /* Allow spinning now */
+ GC_spinning_disabled--;
+# endif
+ }
}
void GC_notify_all_marker(void)
{
GC_ASSERT(mark_cv != 0);
- if (SetEvent(mark_cv) == FALSE)
- ABORT("SetEvent() failed");
+ if (PulseEvent(mark_cv) == FALSE)
+ ABORT("PulseEvent() failed");
}
+/* Defined in os_dep.c */
+extern GC_bool GC_wnt;
+
#endif /* ! GC_PTHREADS */
#endif /* PARALLEL_MARK */
@@ -1482,7 +1536,7 @@
thread_args *args = (thread_args *)arg;
# if DEBUG_WIN32_THREADS
- GC_printf("thread 0x%x starting...\n", GetCurrentThreadId());
+ GC_printf("thread 0x%x starting...\n", (unsigned)GetCurrentThreadId());
# endif
GC_register_my_thread(sb); /* This waits for an in-progress GC. */
@@ -1505,7 +1559,7 @@
# if DEBUG_WIN32_THREADS
GC_printf("thread 0x%x returned from start routine.\n",
- GetCurrentThreadId());
+ (unsigned)GetCurrentThreadId());
# endif
return ret;
}
@@ -1529,7 +1583,8 @@
tls initialized) */
# if DEBUG_WIN32_THREADS
- GC_printf("About to create a thread from 0x%x\n", GetCurrentThreadId());
+ GC_printf("About to create a thread from 0x%x\n",
+ (unsigned)GetCurrentThreadId());
# endif
if (GC_win32_dll_threads) {
return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress,
@@ -1575,7 +1630,8 @@
/* make sure GC is initialized (i.e. main thread is attached,
tls initialized) */
# if DEBUG_WIN32_THREADS
- GC_printf("About to create a thread from 0x%x\n", GetCurrentThreadId());
+ GC_printf("About to create a thread from 0x%x\n",
+ (unsigned)GetCurrentThreadId());
# endif
if (GC_win32_dll_threads) {
@@ -1683,17 +1739,14 @@
# ifdef PARALLEL_MARK
# ifndef GC_PTHREADS
+ /* Initialize Win32 event objects for parallel marking */
+ /* (used even if parallel marking is set disabled at startup). */
mark_mutex_event = CreateEventA(NULL /* attrs */,
FALSE /* isManualReset */,
FALSE /* initialState */, NULL /* name */);
- if (mark_mutex_event == (HANDLE)0)
- ABORT("CreateEvent() failed");
-
builder_cv = CreateEventA(NULL /* attrs */, TRUE /* isManualReset */,
FALSE /* initialState */, NULL /* name */);
- mark_cv = CreateEventA(NULL /* attrs */, FALSE /* isManualReset */,
- FALSE /* initialState */, NULL /* name */);
- if (builder_cv == (HANDLE)0 || mark_cv == (HANDLE)0)
+ if (mark_mutex_event == (HANDLE)0 || builder_cv == (HANDLE)0)
ABORT("CreateEvent() failed");
# endif
@@ -1727,18 +1780,42 @@
GC_markers = MAX_MARKERS; /* silently limit GC_markers value */
}
}
- if (GC_markers <= 1 || GC_win32_dll_threads) {
- GC_parallel = FALSE;
- GC_markers = 1;
- } else {
- GC_parallel = TRUE;
- /* Disable true incremental collection, but generational is OK. */
- GC_time_limit = GC_TIME_UNLIMITED;
+
+ /* Set GC_parallel. */
+ {
+# ifndef GC_PTHREADS
+ HMODULE hK32;
+ /* SignalObjectAndWait() API call works only under NT. */
+# endif
+ if (GC_markers <= 1 || GC_win32_dll_threads
+# ifndef GC_PTHREADS
+ || GC_wnt == FALSE
+ || (hK32 = GetModuleHandleA("kernel32.dll")) == (HMODULE)0
+ || (signalObjectAndWait_func = (SignalObjectAndWait_type)
+ GetProcAddress(hK32, "SignalObjectAndWait")) == 0
+# endif
+ ) {
+ /* Disable parallel marking. */
+ GC_parallel = FALSE;
+ GC_markers = 1;
+ } else {
+# ifndef GC_PTHREADS
+ /* Initialize mark_cv if it is really needed. */
+ mark_cv = CreateEventA(NULL /* attrs */, TRUE /* isManualReset */,
+ FALSE /* initialState */, NULL /* name */);
+ if (mark_cv == (HANDLE)0)
+ ABORT("CreateEvent() failed");
+# endif
+ GC_parallel = TRUE;
+ /* Disable true incremental collection, but generational is OK. */
+ GC_time_limit = GC_TIME_UNLIMITED;
+ }
}
+
if (GC_print_stats) {
GC_log_printf("Number of marker threads = %ld\n", GC_markers);
}
-# endif
+# endif /* PARALLEL_MARK */
GC_register_my_thread(&sb);
@@ -1762,11 +1839,13 @@
# if DEBUG_CYGWIN_THREADS
GC_printf("thread 0x%x(0x%x) is joining thread 0x%x.\n",
- (int)pthread_self(), GetCurrentThreadId(), (int)pthread_id);
+ (int)pthread_self(), (int)GetCurrentThreadId(),
+ (int)pthread_id);
# endif
# if DEBUG_WIN32_PTHREADS
GC_printf("thread 0x%x(0x%x) is joining thread 0x%x.\n",
- (int)(pthread_self()).p, GetCurrentThreadId(), pthread_id.p);
+ (int)(pthread_self()).p, (int)GetCurrentThreadId(),
+ pthread_id.p);
# endif
if (!parallel_initialized) GC_init_parallel();
@@ -1794,11 +1873,13 @@
# if DEBUG_CYGWIN_THREADS
GC_printf("thread 0x%x(0x%x) completed join with thread 0x%x.\n",
- (int)pthread_self(), GetCurrentThreadId(), (int)pthread_id);
+ (int)pthread_self(), (int)GetCurrentThreadId(),
+ (int)pthread_id);
# endif
# if DEBUG_WIN32_PTHREADS
GC_printf("thread 0x%x(0x%x) completed join with thread 0x%x.\n",
- (int)(pthread_self()).p, GetCurrentThreadId(), pthread_id.p);
+ (int)(pthread_self()).p, (int)GetCurrentThreadId(),
+ pthread_id.p);
# endif
return result;
@@ -1836,11 +1917,11 @@
# if DEBUG_CYGWIN_THREADS
GC_printf("About to create a thread from 0x%x(0x%x)\n",
- (int)pthread_self(), GetCurrentThreadId);
+ (int)pthread_self(), (int)GetCurrentThreadId);
# endif
# if DEBUG_WIN32_PTHREADS
GC_printf("About to create a thread from 0x%x(0x%x)\n",
- (int)(pthread_self()).p, GetCurrentThreadId());
+ (int)(pthread_self()).p, (int)GetCurrentThreadId());
# endif
GC_need_to_lock = TRUE;
result = pthread_create(new_thread, attr, GC_pthread_start, si);
@@ -1865,11 +1946,11 @@
# if DEBUG_CYGWIN_THREADS
GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_id,
- thread_id);
+ (int)thread_id);
# endif
# if DEBUG_WIN32_PTHREADS
GC_printf("thread 0x%x(0x%x) starting...\n",(int) pthread_id.p,
- thread_id);
+ (int)thread_id);
# endif
GC_ASSERT(!GC_win32_dll_threads);
@@ -1898,11 +1979,11 @@
# if DEBUG_CYGWIN_THREADS
GC_printf("thread 0x%x(0x%x) returned from start routine.\n",
- (int)pthread_self(),GetCurrentThreadId());
+ (int)pthread_self(),(int)GetCurrentThreadId());
# endif
# if DEBUG_WIN32_PTHREADS
GC_printf("thread 0x%x(0x%x) returned from start routine.\n",
- (int)(pthread_self()).p, GetCurrentThreadId());
+ (int)(pthread_self()).p, (int)GetCurrentThreadId());
# endif
return(result);
@@ -1920,11 +2001,11 @@
GC_ASSERT(!GC_win32_dll_threads);
# if DEBUG_CYGWIN_THREADS
GC_printf("thread 0x%x(0x%x) called pthread_exit().\n",
- (int)pthread_self(),GetCurrentThreadId());
+ (int)pthread_self(),(int)GetCurrentThreadId());
# endif
# if DEBUG_WIN32_PTHREADS
GC_printf("thread 0x%x(0x%x) called pthread_exit().\n",
- (int)(pthread_self()).p,GetCurrentThreadId());
+ (int)(pthread_self()).p,(int)GetCurrentThreadId());
# endif
LOCK();
@@ -1996,6 +2077,13 @@
switch (reason) {
case DLL_THREAD_ATTACH:
+# ifdef PARALLEL_MARK
+ /* Don't register marker threads. */
+ if (GC_parallel) {
+ /* We could reach here only if parallel_initialized == FALSE. */
+ break;
+ }
+# endif
GC_ASSERT(entry_count == 0 || parallel_initialized);
++entry_count; /* and fall through: */
case DLL_PROCESS_ATTACH:
More information about the Gc
mailing list