[sldev] QueryPerformanceCounter() & related issues VWR-940, VWR-962, VWR975

Dzonatas dzonatas at dzonux.net
Sat Jun 2 04:44:34 PDT 2007


Paul TBBle Hampson wrote:
> There's no copyright statement or similar on that page, and I'm
> pretty sure that's been copied from somewhere itself.
>
>   
It has a governmental address, which it is common to assume public 
domain. They are pretty simple.


I have updated that related issues.

I started to sort out the related contributions. Attached is a patch for 
review... more works need to be done on it.

Main change I did was move rdtsc and likewise code into the header file, 
llfasttimers.h, so that code can now assemble in-line. Eh... saves a 
little overhead.




-- 
-------------- next part --------------
Index: indra/llcommon/llfasttimer.h
===================================================================
--- indra/llcommon/llfasttimer.h	(revision 62604)
+++ indra/llcommon/llfasttimer.h	(working copy)
@@ -13,8 +13,70 @@
 
 #define FAST_TIMER_ON 1
 
-U64 get_cpu_clock_count();
+//----------------------------------------------------
+//----------------------------------------------------
+// get_cpu_clock_count() - platform dependant
+//----------------------------------------------------
+//----------------------------------------------------
 
+#if LL_MSVC
+
+inline U64 get_cpu_clock_count()
+{
+	U64 ret;
+	__asm 
+	{
+        _emit   0x0f
+        _emit   0x31
+		mov dword ptr [ret+4], edx
+		mov dword ptr [ret], eax
+	}
+    return ret;
+};
+
+
+#elif LL_GNUC && defined(__i386__)
+inline U64 get_cpu_clock_count()
+{
+	U64 ret;
+	asm (".byte 0x0f, 0x31" : "=A" (ret));
+	return ret;
+}
+
+
+#elif LL_GNUC && defined(__amd64__) || defined(__x86_64__)
+inline U64 get_cpu_clock_count()
+{
+	U64 lo, hi;
+	asm ("rdtsc" : "=a" (x), "=d" (y));
+	return (hi << 32) | lo;
+}
+
+
+#elif LL_GNUC && defined(__powerpc__)
+inline U64 get_cpu_clock_count()
+{
+	U32 lo, hi, tmp;
+	asm ("0: mftbu %0; mftb %1; mftbu %2; cmplw %0,%2; bne 0b" : "=r"(hi), "=r"(lo), "=r"(tmp) );
+	return ((U64)hi << 32) | lo;
+}
+
+
+#else
+inline U64 get_cpu_clock_count()
+{
+	return get_clock_count();
+}
+
+
+#endif
+
+//----------------------------------------------------
+//----------------------------------------------------
+// LLFastTimer
+//----------------------------------------------------
+//----------------------------------------------------
+
 class LLFastTimer
 {
 public:
Index: indra/llcommon/llsys.cpp
===================================================================
--- indra/llcommon/llsys.cpp	(revision 62604)
+++ indra/llcommon/llsys.cpp	(working copy)
@@ -288,14 +288,42 @@
 	mHasSSE2 = (info->_Ext.SSE2_StreamingSIMD2_Extensions != 0);
 	mCPUMhz = (S32)(proc.GetCPUFrequency(50)/1000000.0);
 	mFamily.assign( info->strFamily );
+#ifdef LL_LINUX
+	// *NOTE: This works on linux. What will it do on other systems?
+	FILE* cpuinfo = LLFile::fopen(CPUINFO_FILE, "r");		/* Flawfinder: ignore */
+	if(cpuinfo)
+	{
+		char line[MAX_STRING];		/* Flawfinder: ignore */
+		memset(line, 0, MAX_STRING);
+		while(fgets(line, MAX_STRING, cpuinfo))
+		{
+			// /proc/cpuinfo on Linux looks like:
+			// name\t*: value\n
+			char* tabspot = strchr( line, '\t' );
+			if (tabspot == NULL)
+				continue;
+			char* colspot = strchr( tabspot, ':' );
+			if (colspot == NULL)
+				continue;
+			char* nlspot = strchr( line, '\n' );
+			if (nlspot == NULL)
+				nlspot = line + strlen( line );	// Fallback to terminating NULL
+
+			std::string linename( line, tabspot );
+			std::string lineval( colspot + 2, nlspot );
+			mCPUInfoLines[ linename ] = lineval;
+		}
+		fclose(cpuinfo);
+	}
+#endif
 }
 
 
 std::string LLCPUInfo::getCPUString() const
 {
-#if LL_WINDOWS || LL_DARWIN
 	std::ostringstream out;
 
+#if LL_WINDOWS || LL_DARWIN
 	CProcessor proc;
 	(void) proc.GetCPUInfo();
 	out << proc.strCPUName << " ";
@@ -308,12 +336,32 @@
 		out << "(" << (S32)(freq) << " MHz)";
 	}
 
-	return out.str();
+#else // LL_LINUX
+#if ( defined(__i386__) || defined(__amd64__) || defined(__x86_64__) )
+	out << getInfoLine( "model name" );
+#elif defined(__powerpc__)
+	out << getInfoLine( "platform" ) << " (" << getInfoLine( "clock" ) << ")";
 #else
-	return "Can't get terse CPU information";
+	out << "Can't get terse CPU information";
 #endif
+#endif
+	return out.str();
 }
 
+const std::string& LLCPUInfo::getInfoLine( const std::string index ) const
+{
+	std::map< std::string, std::string >::const_iterator data = mCPUInfoLines.find( index );
+	if (data != mCPUInfoLines.end())
+	{
+		return (*data).second;
+	}
+	else
+	{
+		static const std::string empty;
+		return empty;
+	}
+}
+
 void LLCPUInfo::stream(std::ostream& s) const
 {
 #if LL_WINDOWS || LL_DARWIN
@@ -329,22 +377,16 @@
 		s << "Unable to collect processor info";
 	}
 #else
-	// *NOTE: This works on linux. What will it do on other systems?
-	FILE* cpuinfo = LLFile::fopen(CPUINFO_FILE, "r");		/* Flawfinder: ignore */
-	if(cpuinfo)
+	// Return the machine information we gathered in the constructor
+	if(!mCPUInfoLines.empty())
 	{
-		char line[MAX_STRING];		/* Flawfinder: ignore */
-		memset(line, 0, MAX_STRING);
-		while(fgets(line, MAX_STRING, cpuinfo))
-		{
-			line[strlen(line)-1] = ' ';		 /*Flawfinder: ignore*/
-			s << line;
-		}
-		fclose(cpuinfo);
+		for( std::map< std::string, std::string >::const_iterator i = mCPUInfoLines.begin();
+				i != mCPUInfoLines.end(); ++i )
+			s << (*i).first << "\t: " << (*i).second << ' ';
 	}
 	else
 	{
-		s << "Unable to collect memory information";
+		s << "Unable to collect processor information";
 	}
 #endif
 }
Index: indra/llcommon/llsys.h
===================================================================
--- indra/llcommon/llsys.h	(revision 62604)
+++ indra/llcommon/llsys.h	(working copy)
@@ -20,6 +20,7 @@
 
 #include <iosfwd>
 #include <string>
+#include <map>
 
 class LLOSInfo
 {
@@ -59,11 +60,18 @@
 	// Family is "AMD Duron" or "Intel Pentium Pro"
 	const std::string& getFamily() const { return mFamily; }
 
+#ifdef LL_LINUX
+	const std::string& getInfoLine( const std::string index ) const;
+#endif
+
 private:
 	BOOL mHasSSE;
 	BOOL mHasSSE2;
 	S32 mCPUMhz;
 	std::string mFamily;
+#ifdef LL_LINUX
+	std::map< std::string, std::string > mCPUInfoLines;
+#endif
 };
 
 class LLMemoryInfo
Index: indra/llcommon/llprocessor.cpp
===================================================================
--- indra/llcommon/llprocessor.cpp	(revision 62686)
+++ indra/llcommon/llprocessor.cpp	(working copy)
@@ -38,6 +38,10 @@
 #	include <windows.h>
 #endif
 
+#if LL_LINUX
+#	include <boost/lexical_cast.hpp>
+#endif
+
 #if !LL_DARWIN
 
 #ifdef PROCESSOR_FREQUENCY_MEASURE_AVAILABLE
@@ -156,9 +160,11 @@
 ////////////////////////////////////////////////////////////////////////////
 F64 CProcessor::GetCPUFrequency(unsigned int uiMeasureMSecs)
 {
-#ifndef PROCESSOR_FREQUENCY_MEASURE_AVAILABLE
-	return 0;
-#else
+#if LL_LINUX
+	extern LLCPUInfo gSysCPU;
+	return boost::lexical_cast<F64>(gSysCPU.getInfoLine( "cpu MHz" )) * 1000000;
+
+#elif LL_WINDOWS // PROCESSOR_FREQUENCY_MEASURE_AVAILABLE
 	// If there are invalid measure time parameters, zero msecs for example,
 	// we've to exit the function
 	if (uiMeasureMSecs < 1)
@@ -225,16 +231,7 @@
 	QueryPerformanceCounter((LARGE_INTEGER *) &starttime);
 
 	// Then we get the current cpu clock and store it
-#if LL_GNUC
-	__asm__("rdtsc"	: "=A" (start) ) ;
-#else
-	__asm 
-	{
-		rdtsc
-		mov dword ptr [start+4], edx
-		mov dword ptr [start], eax
-	}
-#endif
+	start = get_cpu_clock_count();
 
 	// Now we wait for some msecs
 	_Delay(uiMeasureMSecs);
@@ -244,16 +241,7 @@
 	QueryPerformanceCounter((LARGE_INTEGER *) &endtime);
 
 	// And also for the end cpu clock
-#if LL_GNUC
-	__asm__("rdtsc"	: "=A" (end) ) ;
-#else
-	__asm 
-	{
-		rdtsc
-		mov dword ptr [end+4], edx
-		mov dword ptr [end], eax
-	}
-#endif
+	end = get_cpu_clock_count();
 
 	// Now we can restore the default process and thread priorities
 	SetProcessAffinityMask(hProcess, dwProcessMask);
@@ -271,6 +259,8 @@
 	// At last we just return the frequency that is also stored in the call
 	// member var uqwFrequency
 	return uqwFrequency;
+#else
+	return 0;
 #endif
 }
 
Index: indra/llcommon/llfasttimer.cpp
===================================================================
--- indra/llcommon/llfasttimer.cpp	(revision 62604)
+++ indra/llcommon/llfasttimer.cpp	(working copy)
@@ -17,6 +17,7 @@
 #include <time.h>
 #include <sys/time.h>
 #include <sched.h>
+#include <boost/lexical_cast.hpp>
 
 #elif LL_DARWIN
 #	include <time.h>
@@ -43,64 +44,17 @@
 
 F64 LLFastTimer::sCPUClockFrequency = 0.0;
 
-//////////////////////////////////////////////////////////////////////////////
 
-//
-// CPU clock/other clock frequency and count functions
-//
-
-#if LL_WINDOWS
-
-U64 get_cpu_clock_count()
-{   U32  hi,lo;
-
-    __asm   
-    {
-        _emit   0x0f
-        _emit   0x31
-        mov     lo,eax
-        mov     hi,edx
-    }
-
-	U64 ret = hi;
-	ret *= 4294967296L;
-	ret |= lo;
-    return ret;
-};
-
-#endif // LL_WINDOWS
-
-
-#if LL_LINUX
-U64 get_cpu_clock_count()
-{
-	U64 x;
-	__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
-	return x;
-}
-#endif
-
-#if LL_DARWIN
-//
-// Mac implementation of CPU clock
-//
-// Just use gettimeofday implementation for now
-
-U64 get_cpu_clock_count()
-{
-	return get_clock_count();
-}
-#endif
-
 //////////////////////////////////////////////////////////////////////////////
 
-//static
-#if LL_LINUX || LL_DARWIN
-// Both Linux and Mac use gettimeofday for accurate time
+
+#if LL_LINUX && defined(__powerpc__)
 U64 LLFastTimer::countsPerSecond()
 {
-	return 1000000; // microseconds, so 1 Mhz.
+	extern LLCPUInfo gSysCPU;
+	return boost::lexical_cast<U64>(gSysCPU.getInfoLine( "timebase" ));
 }
+
 #else
 U64 LLFastTimer::countsPerSecond()
 {
@@ -108,6 +62,8 @@
 	{
 		CProcessor proc;
 		sCPUClockFrequency = proc.GetCPUFrequency(50);
+		if(!sCPUClockFrequency)
+			sCPUClockFrequency = 1000000.0;
 	}
 	return U64(sCPUClockFrequency);
 }


More information about the SLDev mailing list