[sldev] QueryPerformanceCounter() & related issues VWR-940,
VWR-962, VWR975
Dzonatas
dzonatas at dzonux.net
Sat Jun 2 04:44:34 PDT 2007
Paul TBBle Hampson wrote:
> There's no copyright statement or similar on that page, and I'm
> pretty sure that's been copied from somewhere itself.
>
>
It has a governmental address, which it is common to assume public
domain. They are pretty simple.
I have updated that related issues.
I started to sort out the related contributions. Attached is a patch for
review... more works need to be done on it.
Main change I did was move rdtsc and likewise code into the header file,
llfasttimers.h, so that code can now assemble in-line. Eh... saves a
little overhead.
--
-------------- next part --------------
Index: indra/llcommon/llfasttimer.h
===================================================================
--- indra/llcommon/llfasttimer.h (revision 62604)
+++ indra/llcommon/llfasttimer.h (working copy)
@@ -13,8 +13,70 @@
#define FAST_TIMER_ON 1
-U64 get_cpu_clock_count();
+//----------------------------------------------------
+//----------------------------------------------------
+// get_cpu_clock_count() - platform dependant
+//----------------------------------------------------
+//----------------------------------------------------
+#if LL_MSVC
+
+inline U64 get_cpu_clock_count()
+{
+ U64 ret;
+ __asm
+ {
+ _emit 0x0f
+ _emit 0x31
+ mov dword ptr [ret+4], edx
+ mov dword ptr [ret], eax
+ }
+ return ret;
+};
+
+
+#elif LL_GNUC && defined(__i386__)
+inline U64 get_cpu_clock_count()
+{
+ U64 ret;
+ asm (".byte 0x0f, 0x31" : "=A" (ret));
+ return ret;
+}
+
+
+#elif LL_GNUC && defined(__amd64__) || defined(__x86_64__)
+inline U64 get_cpu_clock_count()
+{
+ U64 lo, hi;
+ asm ("rdtsc" : "=a" (x), "=d" (y));
+ return (hi << 32) | lo;
+}
+
+
+#elif LL_GNUC && defined(__powerpc__)
+inline U64 get_cpu_clock_count()
+{
+ U32 lo, hi, tmp;
+ asm ("0: mftbu %0; mftb %1; mftbu %2; cmplw %0,%2; bne 0b" : "=r"(hi), "=r"(lo), "=r"(tmp) );
+ return ((U64)hi << 32) | lo;
+}
+
+
+#else
+inline U64 get_cpu_clock_count()
+{
+ return get_clock_count();
+}
+
+
+#endif
+
+//----------------------------------------------------
+//----------------------------------------------------
+// LLFastTimer
+//----------------------------------------------------
+//----------------------------------------------------
+
class LLFastTimer
{
public:
Index: indra/llcommon/llsys.cpp
===================================================================
--- indra/llcommon/llsys.cpp (revision 62604)
+++ indra/llcommon/llsys.cpp (working copy)
@@ -288,14 +288,42 @@
mHasSSE2 = (info->_Ext.SSE2_StreamingSIMD2_Extensions != 0);
mCPUMhz = (S32)(proc.GetCPUFrequency(50)/1000000.0);
mFamily.assign( info->strFamily );
+#ifdef LL_LINUX
+ // *NOTE: This works on linux. What will it do on other systems?
+ FILE* cpuinfo = LLFile::fopen(CPUINFO_FILE, "r"); /* Flawfinder: ignore */
+ if(cpuinfo)
+ {
+ char line[MAX_STRING]; /* Flawfinder: ignore */
+ memset(line, 0, MAX_STRING);
+ while(fgets(line, MAX_STRING, cpuinfo))
+ {
+ // /proc/cpuinfo on Linux looks like:
+ // name\t*: value\n
+ char* tabspot = strchr( line, '\t' );
+ if (tabspot == NULL)
+ continue;
+ char* colspot = strchr( tabspot, ':' );
+ if (colspot == NULL)
+ continue;
+ char* nlspot = strchr( line, '\n' );
+ if (nlspot == NULL)
+ nlspot = line + strlen( line ); // Fallback to terminating NULL
+
+ std::string linename( line, tabspot );
+ std::string lineval( colspot + 2, nlspot );
+ mCPUInfoLines[ linename ] = lineval;
+ }
+ fclose(cpuinfo);
+ }
+#endif
}
std::string LLCPUInfo::getCPUString() const
{
-#if LL_WINDOWS || LL_DARWIN
std::ostringstream out;
+#if LL_WINDOWS || LL_DARWIN
CProcessor proc;
(void) proc.GetCPUInfo();
out << proc.strCPUName << " ";
@@ -308,12 +336,32 @@
out << "(" << (S32)(freq) << " MHz)";
}
- return out.str();
+#else // LL_LINUX
+#if ( defined(__i386__) || defined(__amd64__) || defined(__x86_64__) )
+ out << getInfoLine( "model name" );
+#elif defined(__powerpc__)
+ out << getInfoLine( "platform" ) << " (" << getInfoLine( "clock" ) << ")";
#else
- return "Can't get terse CPU information";
+ out << "Can't get terse CPU information";
#endif
+#endif
+ return out.str();
}
+const std::string& LLCPUInfo::getInfoLine( const std::string index ) const
+{
+ std::map< std::string, std::string >::const_iterator data = mCPUInfoLines.find( index );
+ if (data != mCPUInfoLines.end())
+ {
+ return (*data).second;
+ }
+ else
+ {
+ static const std::string empty;
+ return empty;
+ }
+}
+
void LLCPUInfo::stream(std::ostream& s) const
{
#if LL_WINDOWS || LL_DARWIN
@@ -329,22 +377,16 @@
s << "Unable to collect processor info";
}
#else
- // *NOTE: This works on linux. What will it do on other systems?
- FILE* cpuinfo = LLFile::fopen(CPUINFO_FILE, "r"); /* Flawfinder: ignore */
- if(cpuinfo)
+ // Return the machine information we gathered in the constructor
+ if(!mCPUInfoLines.empty())
{
- char line[MAX_STRING]; /* Flawfinder: ignore */
- memset(line, 0, MAX_STRING);
- while(fgets(line, MAX_STRING, cpuinfo))
- {
- line[strlen(line)-1] = ' '; /*Flawfinder: ignore*/
- s << line;
- }
- fclose(cpuinfo);
+ for( std::map< std::string, std::string >::const_iterator i = mCPUInfoLines.begin();
+ i != mCPUInfoLines.end(); ++i )
+ s << (*i).first << "\t: " << (*i).second << ' ';
}
else
{
- s << "Unable to collect memory information";
+ s << "Unable to collect processor information";
}
#endif
}
Index: indra/llcommon/llsys.h
===================================================================
--- indra/llcommon/llsys.h (revision 62604)
+++ indra/llcommon/llsys.h (working copy)
@@ -20,6 +20,7 @@
#include <iosfwd>
#include <string>
+#include <map>
class LLOSInfo
{
@@ -59,11 +60,18 @@
// Family is "AMD Duron" or "Intel Pentium Pro"
const std::string& getFamily() const { return mFamily; }
+#ifdef LL_LINUX
+ const std::string& getInfoLine( const std::string index ) const;
+#endif
+
private:
BOOL mHasSSE;
BOOL mHasSSE2;
S32 mCPUMhz;
std::string mFamily;
+#ifdef LL_LINUX
+ std::map< std::string, std::string > mCPUInfoLines;
+#endif
};
class LLMemoryInfo
Index: indra/llcommon/llprocessor.cpp
===================================================================
--- indra/llcommon/llprocessor.cpp (revision 62686)
+++ indra/llcommon/llprocessor.cpp (working copy)
@@ -38,6 +38,10 @@
# include <windows.h>
#endif
+#if LL_LINUX
+# include <boost/lexical_cast.hpp>
+#endif
+
#if !LL_DARWIN
#ifdef PROCESSOR_FREQUENCY_MEASURE_AVAILABLE
@@ -156,9 +160,11 @@
////////////////////////////////////////////////////////////////////////////
F64 CProcessor::GetCPUFrequency(unsigned int uiMeasureMSecs)
{
-#ifndef PROCESSOR_FREQUENCY_MEASURE_AVAILABLE
- return 0;
-#else
+#if LL_LINUX
+ extern LLCPUInfo gSysCPU;
+ return boost::lexical_cast<F64>(gSysCPU.getInfoLine( "cpu MHz" )) * 1000000;
+
+#elif LL_WINDOWS // PROCESSOR_FREQUENCY_MEASURE_AVAILABLE
// If there are invalid measure time parameters, zero msecs for example,
// we've to exit the function
if (uiMeasureMSecs < 1)
@@ -225,16 +231,7 @@
QueryPerformanceCounter((LARGE_INTEGER *) &starttime);
// Then we get the current cpu clock and store it
-#if LL_GNUC
- __asm__("rdtsc" : "=A" (start) ) ;
-#else
- __asm
- {
- rdtsc
- mov dword ptr [start+4], edx
- mov dword ptr [start], eax
- }
-#endif
+ start = get_cpu_clock_count();
// Now we wait for some msecs
_Delay(uiMeasureMSecs);
@@ -244,16 +241,7 @@
QueryPerformanceCounter((LARGE_INTEGER *) &endtime);
// And also for the end cpu clock
-#if LL_GNUC
- __asm__("rdtsc" : "=A" (end) ) ;
-#else
- __asm
- {
- rdtsc
- mov dword ptr [end+4], edx
- mov dword ptr [end], eax
- }
-#endif
+ end = get_cpu_clock_count();
// Now we can restore the default process and thread priorities
SetProcessAffinityMask(hProcess, dwProcessMask);
@@ -271,6 +259,8 @@
// At last we just return the frequency that is also stored in the call
// member var uqwFrequency
return uqwFrequency;
+#else
+ return 0;
#endif
}
Index: indra/llcommon/llfasttimer.cpp
===================================================================
--- indra/llcommon/llfasttimer.cpp (revision 62604)
+++ indra/llcommon/llfasttimer.cpp (working copy)
@@ -17,6 +17,7 @@
#include <time.h>
#include <sys/time.h>
#include <sched.h>
+#include <boost/lexical_cast.hpp>
#elif LL_DARWIN
# include <time.h>
@@ -43,64 +44,17 @@
F64 LLFastTimer::sCPUClockFrequency = 0.0;
-//////////////////////////////////////////////////////////////////////////////
-//
-// CPU clock/other clock frequency and count functions
-//
-
-#if LL_WINDOWS
-
-U64 get_cpu_clock_count()
-{ U32 hi,lo;
-
- __asm
- {
- _emit 0x0f
- _emit 0x31
- mov lo,eax
- mov hi,edx
- }
-
- U64 ret = hi;
- ret *= 4294967296L;
- ret |= lo;
- return ret;
-};
-
-#endif // LL_WINDOWS
-
-
-#if LL_LINUX
-U64 get_cpu_clock_count()
-{
- U64 x;
- __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
- return x;
-}
-#endif
-
-#if LL_DARWIN
-//
-// Mac implementation of CPU clock
-//
-// Just use gettimeofday implementation for now
-
-U64 get_cpu_clock_count()
-{
- return get_clock_count();
-}
-#endif
-
//////////////////////////////////////////////////////////////////////////////
-//static
-#if LL_LINUX || LL_DARWIN
-// Both Linux and Mac use gettimeofday for accurate time
+
+#if LL_LINUX && defined(__powerpc__)
U64 LLFastTimer::countsPerSecond()
{
- return 1000000; // microseconds, so 1 Mhz.
+ extern LLCPUInfo gSysCPU;
+ return boost::lexical_cast<U64>(gSysCPU.getInfoLine( "timebase" ));
}
+
#else
U64 LLFastTimer::countsPerSecond()
{
@@ -108,6 +62,8 @@
{
CProcessor proc;
sCPUClockFrequency = proc.GetCPUFrequency(50);
+ if(!sCPUClockFrequency)
+ sCPUClockFrequency = 1000000.0;
}
return U64(sCPUClockFrequency);
}
More information about the SLDev
mailing list