[Gc] One of the reasons I would like C++ to have GC

Achilleas Margaritis axilmar at otenet.gr
Sat Apr 28 06:52:31 PDT 2007


One of the reasons that I would like C++ to have GC is that its function 
call dispatch is faster than Java's. I made a little benchmark to 
measure dispatch times in Java and in C++.

Here are the Java results:

foo.action1()           = 0.000000002976 sec
Foo.action1_static(foo) = 0.000000002880 sec
ifoo.action1(foo)       = 0.000000008021 sec

And here are the C++ results:

Foo_action1(foo)                 = 0.000000002964 sec
foo->action1()                   = 0.000000002554 sec
Foo_action1_p(foo)               = 0.000000002652 sec
action1_table[class_id_Foo](foo) = 0.000000002950 sec
action1_table[foo->m_id](foo)    = 0.000000002952 sec

The source code is at the end of this message.

Conclusion:

-Invoking a method through an interface in Java is very slow, even with 
one of the latests JVMs (Sun version 1.5.0_10) (8 nanosecs vs 3 nanosecs).

-invocation of a Java method is a little bit slower than a C++ method (3 
nanosecs against 2.5 nanosecs).

-the static Java method invocation is a little bit faster than the 
non-static one, on par with C++.

-even if the C++ method is invoked indirectly from a table indexed by a 
global variable, modern CPUs are smart enough to make the indirect call 
almost equal to direct call.

The above is on an Athlon 64. On a Core II Duo, the numbers are more in 
favor of C++.

Of course the benchmark is not a very realistic one, where target call 
address changes often. And the differences are small, in the nanosecond 
range, so they might be of secondary issue, depending on project, of 
course.

But, for me, if I can have a little bit of extra performance, then why 
not have it?

Here is the Java source:

package test;

import java.util.Formatter;

class PerformanceCounter {
	private double m_start;
	private double m_end;
	
	public void start() {
		m_start = System.nanoTime();
	}
	
	public void end() {
		m_end = System.nanoTime();
	}
	
	public double duration(int iterations, double factor) {
		return ((m_end - m_start) / iterations) * factor;
	}
	
	public double duration(int iterations) {
		return duration(iterations, 1.0);
	}
	
	public double duration() {
		return duration(1, 1.0);
	}
}

interface IFoo {
	public void action1();
}

class Foo implements IFoo {
	int m_data = 0;
	
	public final void action1() {
		++m_data;
	}
	
	static final void action1_static(Foo f) {
		++f.m_data;
	}
}

public class Main {
	static final int MAX_CALL = 1000000000;
	static PerformanceCounter pc = new PerformanceCounter();
	
	public static void main(String[] args) {
		Foo foo1 = new Foo();
		IFoo ifoo = foo1;
		
		pc.start();
		for(int i = 0; i < MAX_CALL; ++i) {
			foo1.action1();
		}
		pc.end();
		System.out.println(new Formatter().format("foo.action1()           = 
%1.12f sec",
				new Object[]{pc.duration(MAX_CALL, 0.000000001)}).toString());
		
		pc.start();
		for(int i = 0; i < MAX_CALL; ++i) {
			Foo.action1_static(foo1);
		}
		pc.end();
		System.out.println(new Formatter().format("Foo.action1_static(foo) = 
%1.12f sec",
				new Object[]{pc.duration(MAX_CALL, 0.000000001)}).toString());
		
		pc.start();
		for(int i = 0; i < MAX_CALL; ++i) {
			ifoo.action1();
		}
		pc.end();
		System.out.println(new Formatter().format("ifoo.action1(foo)       = 
%1.12f sec",
				new Object[]{pc.duration(MAX_CALL, 0.000000001)}).toString());
	}
}

And here is the C++ source (for win32, with full optimizations, compiled 
with __fastcall):


#include <iostream>
using namespace std;


#include <windows.h>


class PerformanceCounter {
public:
     PerformanceCounter() {
         QueryPerformanceFrequency(&m_freq);
     }

     void start() {
         QueryPerformanceCounter(&m_start);
     }

     void end() {
         QueryPerformanceCounter(&m_end);
     }

     double duration(double iterations = 1, double factor = 1.0) const {
         return ((((double)m_end.QuadPart - (double)m_start.QuadPart) / 
(double)m_freq.QuadPart) * factor) / iterations;
     }

private:
     LARGE_INTEGER m_freq;
     LARGE_INTEGER m_start;
     LARGE_INTEGER m_end;
};



//#define CALLC      __cdecl
#define CALLC   __fastcall


class Object {
public:
	const int m_id;
	
	Object(int id) : m_id(id) {
	}
	
	virtual void CALLC action1() = 0;
};


const int class_id_Foo = 0;


class Foo : public Object {
public:
	int m_data;

	Foo() : Object(class_id_Foo), m_data(0) {
	}
	
	virtual void CALLC action1();
};

void CALLC Foo::action1() {
	++m_data;
}


void CALLC Foo_action1(Foo *foo) {
	++foo->m_data;
}	


typedef void (CALLC *Action1)(Object *);


extern const Action1 action1_table[] = {
	(Action1)Foo_action1,
};


#define MAX_CALL   1000000000


int main() {
	PerformanceCounter pc;
	void (CALLC *Foo_action1_p)(Foo *) = Foo_action1;
	
	Foo *foo = new Foo;
	Object *foo1 = new Foo;


	//direct function dispatch	
	pc.start();
	for(int i = 0; i < MAX_CALL; ++i) {
		Foo_action1(foo);
	}
	pc.end();
	printf("Foo_action1(foo)                 = %1.12f sec\n", 
pc.duration(MAX_CALL));

	
	//C++ dispatch
	pc.start();
	for(int i = 0; i < MAX_CALL; ++i) {
		foo1->action1();
	}
	pc.end();
	printf("foo->action1()                   = %1.12f sec\n", 
pc.duration(MAX_CALL));

		
	//indirect function dispatch	
	pc.start();
	for(int i = 0; i < MAX_CALL; ++i) {
		Foo_action1_p(foo);
	}		
	pc.end();
	printf("Foo_action1_p(foo)               = %1.12f sec\n", 
pc.duration(MAX_CALL));

	
	//direct table dispatch
	pc.start();
	for(int i = 0; i < MAX_CALL; ++i) {
		action1_table[class_id_Foo](foo);
	}		
	pc.end();
	printf("action1_table[class_id_Foo](foo) = %1.12f sec\n", 
pc.duration(MAX_CALL));


	//indirect table dispatch
	pc.start();
	for(int i = 0; i < MAX_CALL; ++i) {
		action1_table[foo->m_id](foo);
	}
	pc.end();
	printf("action1_table[foo->m_id](foo)    = %1.12f sec\n", 
pc.duration(MAX_CALL));

	
	getchar();
	return 0;
}



More information about the Gc mailing list