[Gc] Fix for Win32 PARALLEL_MARK

Ivan Maidanski ivmai at mail.ru
Sun Oct 26 07:16:32 PST 2008


Hi!

This patch is for my previous patch introducing parallel marking on Win32 and Cygwin. My previous implementation of Win32 GC_wait_marker() turns (frequently when GC_MARKERS > ncpu) into simple yielding (thus uselessly consuming CPU time outside garbage collections).

Now I'm using PulseEvent and atomic NT SignalObjectAndWait() (on Windows 98/Me parallel marking is set disabled but it's ok since multicore CPUs are not supported by these OS, AFAIK).

Mark lock spinning is disabled by default (unless MARKLOCK_USES_SPIN) but it works ok. More careful measurements should be done.

Tested with MinGW and VC++ 32-bit on dual core CPU.

Still some questions remain:
- should we scan markers in GC_get_next_stack()?;
- should we use .._full() version (or weaker) of CAS in GC_acquire_mark_lock()?

PS. This patch also adjusts ..._printf() actual params (according to their format specifiers) for "win32_threads.c" file.

Bye.

-------------- next part --------------
diff -ru bdwgc/win32_threads.c updated/bdwgc/win32_threads.c
--- bdwgc/win32_threads.c	2008-10-24 17:42:24.000000000 +0400
+++ updated/bdwgc/win32_threads.c	2008-10-26 13:16:20.000000000 +0300
@@ -402,8 +402,7 @@
 		        0,
 		        0,
 		        DUPLICATE_SAME_ACCESS)) {
-	DWORD last_error = GetLastError();
-	GC_err_printf("Last error code: %d\n", last_error);
+	GC_err_printf("Last error code: %d\n", (int)GetLastError());
 	ABORT("DuplicateHandle failed");
   }
   me -> last_stack_min = ADDR_LIMIT;
@@ -947,7 +946,7 @@
       if (sp >= stack_min && sp < thread->stack_base) {
 #       ifdef DEBUG_THREADS
 	  GC_printf("Pushing thread from %p to %p for 0x%x from 0x%x\n",
-		    sp, thread -> stack_base, thread -> id, me);
+		    sp, thread -> stack_base, (int)thread -> id, (int)me);
 #       endif
         GC_push_all_stack(sp, thread->stack_base);
       } else {
@@ -968,7 +967,7 @@
   DWORD me = GetCurrentThreadId();
   GC_bool found_me = FALSE;
 # ifndef SMALL_CONFIG
-    size_t nthreads = 0;
+    unsigned nthreads = 0;
 # endif
   
   if (GC_win32_dll_threads) {
@@ -1154,7 +1153,8 @@
     for (i = 0; i < GC_markers - 1; ++i) {
       if (0 != pthread_create(&new_thread, &attr,
 			      GC_mark_thread, (void *)(word)i)) {
-	WARN("Marker thread creation failed, errno = %d.\n", errno);
+	WARN("Marker thread creation failed, errno = %ld.\n",
+		/* (word) */ errno);
       }
     }
     pthread_attr_destroy(&attr);
@@ -1314,14 +1314,19 @@
 #define SPIN_MAX 128	/* Maximum number of calls to GC_pause before	*/
 #endif			/* give up.					*/
 
+volatile unsigned GC_spinning_disabled; /* don't spin if non-zero */
+
 STATIC int GC_spin_and_trylock(volatile AO_t *pmutex_waitcnt)
 {
   unsigned pause_length;
   unsigned i;
   GC_pause();
   for (pause_length = 2; pause_length <= SPIN_MAX; pause_length <<= 1) {
+    /* FIXME: Should we use ..._full version of CAS here (and below)?	*/
     if (AO_compare_and_swap(pmutex_waitcnt, 0, 1))
       return 0;
+    if (GC_spinning_disabled)
+      break;
     for (i = 0; i < pause_length; ++i)
       GC_pause();
   }
@@ -1345,11 +1350,14 @@
   /* Here we implement the same spinning algorithm as	*/
   /* in GC_generic_lock() in pthread_support.c.	*/
 
+  if (GC_spinning_disabled)
+    goto nospinning;
   if (!AO_compare_and_swap(&GC_mark_mutex_waitcnt, 0, 1)) {
     if (GC_spin_and_trylock(&GC_mark_mutex_waitcnt) < 0) {
+     nospinning:
 # endif
 
-      if ((signed_word)AO_fetch_and_add1_full(&GC_mark_mutex_waitcnt) > 0) {
+      if (AO_fetch_and_add1_full(&GC_mark_mutex_waitcnt) != 0) {
 #	ifdef LOCK_STATS
           ++GC_block_count;
 #	endif
@@ -1389,7 +1397,8 @@
 #   ifdef GC_ASSERTIONS
 	GC_mark_lock_holder = NO_THREAD;
 #   endif
-    if ((signed_word)AO_fetch_and_sub1_full(&GC_mark_mutex_waitcnt) > 1 &&
+    GC_ASSERT(GC_mark_mutex_waitcnt != 0);
+    if (AO_fetch_and_sub1_full(&GC_mark_mutex_waitcnt) > 1 &&
 	 SetEvent(mark_mutex_event) == FALSE)
 	ABORT("SetEvent() failed");
 }
@@ -1431,36 +1440,81 @@
 /* different places (and, in addition, notifying is done after leaving	*/
 /* critical section) and this could result in a signal loosing between	*/
 /* checking for a particular condition and calling WaitForSingleObject.	*/
-/* So, we use "auto-reset" Event and propagate a broadcast manually	*/
-/* (unless the current thread is the only one waiting on mark_cv).	*/
-
-STATIC HANDLE mark_cv = (HANDLE)0; /* Event with auto-reset */
-
-STATIC unsigned mark_cv_waiter_cnt = 0;
+/* So, we use PulseEvent() and NT SignalObjectAndWait() (which		*/
+/* atomically sets mutex event to signaled state and starts waiting on	*/
+/* condvar). A special case here is GC_mark_mutex_waitcnt == 1 (i.e.	*/
+/* nobody waits for mark lock at this moment) - we don't change it	*/
+/* (otherwise we may loose a signal sent between decrementing		*/
+/* GC_mark_mutex_waitcnt and calling WaitForSingleObject()).		*/
+
+STATIC HANDLE mark_cv = (HANDLE)0; /* Event with manual reset */
+
+typedef DWORD (WINAPI * SignalObjectAndWait_type)(
+		HANDLE, HANDLE, DWORD, BOOL);
+STATIC SignalObjectAndWait_type signalObjectAndWait_func = 0;
 
 void GC_wait_marker(void)
 {
     /* Here we assume that GC_wait_marker() is always called	*/
     /* from a while(check_cond) loop.				*/
+    AO_t waitcnt;
     GC_ASSERT(mark_cv != 0);
-    mark_cv_waiter_cnt++;
-    GC_release_mark_lock();
-    if (WaitForSingleObject(mark_cv, INFINITE) == WAIT_FAILED)
-	ABORT("WaitForSingleObject() failed");
-    /* WaitForSingleObject() unblocks one thread and resets mark_cv */
-    GC_acquire_mark_lock();
-    /* Unblock one more waiting thread (if any) */
-    if (--mark_cv_waiter_cnt != 0 && SetEvent(mark_cv) == FALSE)
-	ABORT("SetEvent() failed");
+    GC_ASSERT(signalObjectAndWait_func != 0);
+
+    /* We inline GC_release_mark_lock() to have atomic		*/
+    /* unlock-and-wait action here.				*/
+    GC_ASSERT(GC_mark_lock_holder == (unsigned long)GetCurrentThreadId());
+#   ifdef GC_ASSERTIONS
+	GC_mark_lock_holder = NO_THREAD;
+#   endif
+    
+    if ((waitcnt = GC_mark_mutex_waitcnt) > 1) {
+	(void)AO_fetch_and_sub1_full(&GC_mark_mutex_waitcnt);
+    } else {
+	GC_ASSERT(waitcnt != 0);
+#	ifdef MARKLOCK_USES_SPIN
+	   /* Avoid spinning since GC_mark_mutex_waitcnt is guaranteed	*/
+	   /* to be non-zero till the end of this function.		*/
+           GC_spinning_disabled++;
+#	endif
+    }
+
+    /* The state of mark_cv is non-signaled here. */
+    if ((*signalObjectAndWait_func)(mark_mutex_event /* hObjectToSignal */,
+				mark_cv /* hObjectToWaitOn */,
+				INFINITE /* timeout */,
+				FALSE /* isAlertable */) == WAIT_FAILED)
+	ABORT("SignalObjectAndWait() failed");
+    /* The state of mark_cv is non-signaled here again. */
+
+    if (waitcnt > 1) {
+	GC_acquire_mark_lock();
+    } else {
+	GC_ASSERT(GC_mark_mutex_waitcnt != 0);
+	/* Acquire mark lock */
+	if (WaitForSingleObject(mark_mutex_event, INFINITE) == WAIT_FAILED)
+	    ABORT("WaitForSingleObject() failed");
+	GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
+#	ifdef GC_ASSERTIONS
+	    GC_mark_lock_holder = (unsigned long)GetCurrentThreadId();
+#	endif
+#	ifdef MARKLOCK_USES_SPIN
+	    /* Allow spinning now */
+	    GC_spinning_disabled--;
+#	endif
+    }
 }
 
 void GC_notify_all_marker(void)
 {
     GC_ASSERT(mark_cv != 0);
-    if (SetEvent(mark_cv) == FALSE)
-	ABORT("SetEvent() failed");
+    if (PulseEvent(mark_cv) == FALSE)
+	ABORT("PulseEvent() failed");
 }
 
+/* Defined in os_dep.c */
+extern GC_bool GC_wnt;
+
 #endif /* ! GC_PTHREADS */
 
 #endif /* PARALLEL_MARK */
@@ -1482,7 +1536,7 @@
     thread_args *args = (thread_args *)arg;
 
 #   if DEBUG_WIN32_THREADS
-      GC_printf("thread 0x%x starting...\n", GetCurrentThreadId());
+      GC_printf("thread 0x%x starting...\n", (unsigned)GetCurrentThreadId());
 #   endif
 
     GC_register_my_thread(sb); /* This waits for an in-progress GC. */
@@ -1505,7 +1559,7 @@
 
 #   if DEBUG_WIN32_THREADS
       GC_printf("thread 0x%x returned from start routine.\n",
-		GetCurrentThreadId());
+		(unsigned)GetCurrentThreadId());
 #   endif
     return ret;
 }
@@ -1529,7 +1583,8 @@
 		   tls initialized) */
 
 #   if DEBUG_WIN32_THREADS
-      GC_printf("About to create a thread from 0x%x\n", GetCurrentThreadId());
+      GC_printf("About to create a thread from 0x%x\n",
+		(unsigned)GetCurrentThreadId());
 #   endif
     if (GC_win32_dll_threads) {
       return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress,
@@ -1575,7 +1630,8 @@
     		/* make sure GC is initialized (i.e. main thread is attached,
 		   tls initialized) */
 #   if DEBUG_WIN32_THREADS
-      GC_printf("About to create a thread from 0x%x\n", GetCurrentThreadId());
+      GC_printf("About to create a thread from 0x%x\n",
+		(unsigned)GetCurrentThreadId());
 #   endif
 
     if (GC_win32_dll_threads) {
@@ -1683,17 +1739,14 @@
     
 #   ifdef PARALLEL_MARK
 #     ifndef GC_PTHREADS
+	/* Initialize Win32 event objects for parallel marking		*/
+	/* (used even if parallel marking is set disabled at startup).	*/
 	mark_mutex_event = CreateEventA(NULL /* attrs */,
 				FALSE /* isManualReset */,
 				FALSE /* initialState */, NULL /* name */);
-	if (mark_mutex_event == (HANDLE)0)
-	  ABORT("CreateEvent() failed");
-	
 	builder_cv = CreateEventA(NULL /* attrs */, TRUE /* isManualReset */,
 			FALSE /* initialState */, NULL /* name */);
-	mark_cv = CreateEventA(NULL /* attrs */, FALSE /* isManualReset */,
-			FALSE /* initialState */, NULL /* name */);
-	if (builder_cv == (HANDLE)0 || mark_cv == (HANDLE)0)
+	if (mark_mutex_event == (HANDLE)0 || builder_cv == (HANDLE)0)
 	  ABORT("CreateEvent() failed");
 #     endif
 
@@ -1727,18 +1780,42 @@
 	    GC_markers = MAX_MARKERS; /* silently limit GC_markers value */
 	}
       }	
-      if (GC_markers <= 1 || GC_win32_dll_threads) {
-	GC_parallel = FALSE;
-	GC_markers = 1;
-      } else {
-	GC_parallel = TRUE;
-	/* Disable true incremental collection, but generational is OK.	*/
-	GC_time_limit = GC_TIME_UNLIMITED;
+      
+      /* Set GC_parallel. */
+      {
+#	ifndef GC_PTHREADS
+	  HMODULE hK32;
+	  /* SignalObjectAndWait() API call works only under NT.	*/
+#	endif
+	if (GC_markers <= 1 || GC_win32_dll_threads
+#	    ifndef GC_PTHREADS
+	      || GC_wnt == FALSE
+	      || (hK32 = GetModuleHandleA("kernel32.dll")) == (HMODULE)0
+	      || (signalObjectAndWait_func = (SignalObjectAndWait_type)
+			GetProcAddress(hK32, "SignalObjectAndWait")) == 0
+#	    endif
+	   ) {
+	  /* Disable parallel marking. */
+	  GC_parallel = FALSE;
+	  GC_markers = 1;
+	} else {
+#	  ifndef GC_PTHREADS
+	    /* Initialize mark_cv if it is really needed.	*/
+	    mark_cv = CreateEventA(NULL /* attrs */, TRUE /* isManualReset */,
+				FALSE /* initialState */, NULL /* name */);
+	    if (mark_cv == (HANDLE)0)
+	      ABORT("CreateEvent() failed");
+#	  endif
+	  GC_parallel = TRUE;
+	  /* Disable true incremental collection, but generational is OK. */
+	  GC_time_limit = GC_TIME_UNLIMITED;
+	}
       }
+      
       if (GC_print_stats) {
 	GC_log_printf("Number of marker threads = %ld\n", GC_markers);
       }
-#   endif
+#   endif /* PARALLEL_MARK */
     
     GC_register_my_thread(&sb);
 
@@ -1762,11 +1839,13 @@
 
 #   if DEBUG_CYGWIN_THREADS
       GC_printf("thread 0x%x(0x%x) is joining thread 0x%x.\n",
-		(int)pthread_self(), GetCurrentThreadId(), (int)pthread_id);
+		(int)pthread_self(), (int)GetCurrentThreadId(),
+		(int)pthread_id);
 #   endif
 #   if DEBUG_WIN32_PTHREADS
       GC_printf("thread 0x%x(0x%x) is joining thread 0x%x.\n",
-		(int)(pthread_self()).p, GetCurrentThreadId(), pthread_id.p);
+		(int)(pthread_self()).p, (int)GetCurrentThreadId(),
+		pthread_id.p);
 #   endif
 
     if (!parallel_initialized) GC_init_parallel();
@@ -1794,11 +1873,13 @@
 
 #   if DEBUG_CYGWIN_THREADS
       GC_printf("thread 0x%x(0x%x) completed join with thread 0x%x.\n",
-		 (int)pthread_self(), GetCurrentThreadId(), (int)pthread_id);
+		(int)pthread_self(), (int)GetCurrentThreadId(),
+		(int)pthread_id);
 #   endif
 #   if DEBUG_WIN32_PTHREADS
       GC_printf("thread 0x%x(0x%x) completed join with thread 0x%x.\n",
-		(int)(pthread_self()).p, GetCurrentThreadId(), pthread_id.p);
+		(int)(pthread_self()).p, (int)GetCurrentThreadId(),
+		pthread_id.p);
 #   endif
 
     return result;
@@ -1836,11 +1917,11 @@
 
 #   if DEBUG_CYGWIN_THREADS
       GC_printf("About to create a thread from 0x%x(0x%x)\n",
-		(int)pthread_self(), GetCurrentThreadId);
+		(int)pthread_self(), (int)GetCurrentThreadId);
 #   endif
 #   if DEBUG_WIN32_PTHREADS
       GC_printf("About to create a thread from 0x%x(0x%x)\n",
-		(int)(pthread_self()).p, GetCurrentThreadId());
+		(int)(pthread_self()).p, (int)GetCurrentThreadId());
 #   endif
     GC_need_to_lock = TRUE;
     result = pthread_create(new_thread, attr, GC_pthread_start, si); 
@@ -1865,11 +1946,11 @@
 
 #   if DEBUG_CYGWIN_THREADS
       GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_id,
-		      				  thread_id);
+		      				  (int)thread_id);
 #   endif
 #   if DEBUG_WIN32_PTHREADS
       GC_printf("thread 0x%x(0x%x) starting...\n",(int) pthread_id.p,
-      						  thread_id);
+      						  (int)thread_id);
 #   endif
 
     GC_ASSERT(!GC_win32_dll_threads);
@@ -1898,11 +1979,11 @@
 
 #   if DEBUG_CYGWIN_THREADS
       GC_printf("thread 0x%x(0x%x) returned from start routine.\n",
-		(int)pthread_self(),GetCurrentThreadId());
+		(int)pthread_self(),(int)GetCurrentThreadId());
 #   endif
 #   if DEBUG_WIN32_PTHREADS
       GC_printf("thread 0x%x(0x%x) returned from start routine.\n",
-		(int)(pthread_self()).p, GetCurrentThreadId());
+		(int)(pthread_self()).p, (int)GetCurrentThreadId());
 #   endif
 
     return(result);
@@ -1920,11 +2001,11 @@
     GC_ASSERT(!GC_win32_dll_threads);
 #   if DEBUG_CYGWIN_THREADS
       GC_printf("thread 0x%x(0x%x) called pthread_exit().\n",
-		(int)pthread_self(),GetCurrentThreadId());
+		(int)pthread_self(),(int)GetCurrentThreadId());
 #   endif
 #   if DEBUG_WIN32_PTHREADS
       GC_printf("thread 0x%x(0x%x) called pthread_exit().\n",
-		(int)(pthread_self()).p,GetCurrentThreadId());
+		(int)(pthread_self()).p,(int)GetCurrentThreadId());
 #   endif
 
     LOCK();
@@ -1996,6 +2077,13 @@
 
   switch (reason) {
    case DLL_THREAD_ATTACH:
+#   ifdef PARALLEL_MARK
+      /* Don't register marker threads. */
+      if (GC_parallel) {
+	  /* We could reach here only if parallel_initialized == FALSE.	*/
+	  break;
+      }
+#   endif
     GC_ASSERT(entry_count == 0 || parallel_initialized);
     ++entry_count; /* and fall through: */
    case DLL_PROCESS_ATTACH:


More information about the Gc mailing list