[Gc] Removal of mark lock spinning for Win32

Ivan Maidanski ivmai at mail.ru
Sat Nov 1 02:46:31 PST 2008


Hi!

It turned out that mark lock spinning (GC_WIN32_THREADS + PARALLEL_MARK + MARKLOCK_USES_SPIN) is almost useless (and only uselessly consumes CPU time) in practice (regardless of SPIN_MAX) since spinning is disabled by GC_wait_marker() most of run time.

Here is sample (and approx.) locking statistics (spin_count/block_count/unlocked_count, on a dual-core CPU):
- test.c (incremental, markers=2): 2/774/509
- test.c (non-incremental, markers=2): 8/2346/1729
- test.c (non-incremental, markers=4): 74/6855/920 (max)
- a long running demo GUI app with several active threads (markers=2): 4/1952256/1002

So, this patch removes mark lock spinning code blocks together with MARKLOCK_USES_SPIN macro.

Several words about performance gain with parallel markers (Win32).

I ran two instances (in parallel) of a long running demo GUI app with several active threads (one without PARALLEL_MARK and other with GC_markers=2, both in non-incremental mode, all optimizations on, no assertions, heapsize=35M). It gives me in average about 25% shorter world-stopped delays (no more) after 1500 collections. In the incremental mode (both, no time limit) I haven't noticed any performance gain at all.

Bye.

-------------- next part --------------
diff -ru bdwgc/win32_threads.c updated/bdwgc/win32_threads.c
--- bdwgc/win32_threads.c	2008-10-30 18:12:08.000000000 +0300
+++ updated/bdwgc/win32_threads.c	2008-10-30 23:01:46.262187300 +0300
@@ -1301,109 +1301,31 @@
 
 /* mark_mutex_event, builder_cv, mark_cv are initialized in GC_thr_init(). */
 
-#ifdef MARKLOCK_USES_SPIN
-/* MARKLOCK_USES_SPIN has the opposite meaning to NO_PTHREAD_TRYLOCK. */
-
-/* GC_pause() is the same as in pthread_support.c */
-
-/* Spend a few cycles in a way that can't introduce contention with	*/
-/* other threads.							*/
-STATIC void GC_pause(void)
-{
-    int i;
-#   if !defined(__GNUC__) || defined(__INTEL_COMPILER)
-      volatile word dummy = 0;
-#   endif
-
-    for (i = 0; i < 10; ++i) { 
-#     if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-        __asm__ __volatile__ (" " : : : "memory");
-#     else
-	/* Something that's unlikely to be optimized away. */
-	GC_noop1(++dummy);
-#     endif
-    }
-}
-
-#ifndef SPIN_MAX
-#define SPIN_MAX 128	/* Maximum number of calls to GC_pause before	*/
-#endif			/* give up.					*/
-
-volatile unsigned GC_spinning_disabled; /* don't spin if non-zero */
-
-STATIC int GC_spin_and_trylock(volatile AO_t *pmutex_waitcnt)
-{
-  unsigned pause_length;
-  unsigned i;
-  GC_pause();
-  for (pause_length = 2; pause_length <= SPIN_MAX; pause_length <<= 1) {
-    /* FIXME: Should we use ..._full version of CAS here (and below)?	*/
-    if (AO_compare_and_swap(pmutex_waitcnt, 0, 1))
-      return 0;
-    if (GC_spinning_disabled)
-      break;
-    for (i = 0; i < pause_length; ++i)
-      GC_pause();
-  }
-  /* Failed. */
-  /* Don't do final AO_compare_and_swap() after pausing. */
-  return -1;
-}
-
-#endif /* MARKLOCK_USES_SPIN */
-
 /* #define LOCK_STATS */
 #ifdef LOCK_STATS
-  unsigned long GC_spin_count = 0;
   unsigned long GC_block_count = 0;
   unsigned long GC_unlocked_count = 0;
 #endif
 
 void GC_acquire_mark_lock(void)
 {
-# ifdef MARKLOCK_USES_SPIN
-  /* Here we implement the same spinning algorithm as	*/
-  /* in GC_generic_lock() in pthread_support.c.	*/
-
-  if (GC_spinning_disabled)
-    goto nospinning;
-  if (!AO_compare_and_swap(&GC_mark_mutex_waitcnt, 0, 1)) {
-    if (GC_spin_and_trylock(&GC_mark_mutex_waitcnt) < 0) {
-     nospinning:
-# endif
-
-      if (AO_fetch_and_add1_full(&GC_mark_mutex_waitcnt) != 0) {
+    if (AO_fetch_and_add1_full(&GC_mark_mutex_waitcnt) != 0) {
 #	ifdef LOCK_STATS
           ++GC_block_count;
 #	endif
         if (WaitForSingleObject(mark_mutex_event, INFINITE) == WAIT_FAILED)
           ABORT("WaitForSingleObject() failed");
-      }
-#     ifdef LOCK_STATS
-        else {
-#	  ifdef MARKLOCK_USES_SPIN
-	    ++GC_spin_count;
-#	  else
-	    ++GC_unlocked_count;
-#	  endif
-	}
-#     endif
-  
-# ifdef MARKLOCK_USES_SPIN
     }
 #   ifdef LOCK_STATS
-      else ++GC_spin_count;
+        else {
+	  ++GC_unlocked_count;
+	}
 #   endif
-  }
-# ifdef LOCK_STATS
-    else ++GC_unlocked_count;
-# endif
-# endif
   
-  GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
-# ifdef GC_ASSERTIONS
-    GC_mark_lock_holder = (unsigned long)GetCurrentThreadId();
-# endif
+    GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
+#   ifdef GC_ASSERTIONS
+	GC_mark_lock_holder = (unsigned long)GetCurrentThreadId();
+#   endif
 }
 
 void GC_release_mark_lock(void)
@@ -1487,11 +1409,6 @@
 	(void)AO_fetch_and_sub1_full(&GC_mark_mutex_waitcnt);
     } else {
 	GC_ASSERT(waitcnt != 0);
-#	ifdef MARKLOCK_USES_SPIN
-	   /* Avoid spinning since GC_mark_mutex_waitcnt is guaranteed	*/
-	   /* to be non-zero till the end of this function.		*/
-           GC_spinning_disabled++;
-#	endif
     }
 
     /* The state of mark_cv is non-signaled here. */
@@ -1513,10 +1430,6 @@
 #	ifdef GC_ASSERTIONS
 	    GC_mark_lock_holder = (unsigned long)GetCurrentThreadId();
 #	endif
-#	ifdef MARKLOCK_USES_SPIN
-	    /* Allow spinning now */
-	    GC_spinning_disabled--;
-#	endif
     }
 }
 


More information about the Gc mailing list