/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ /* * (C) 2006 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ /*********************************************************************** This header file provides some simple functions to use the pentium cycle counter to take timings. Let me know if you find any errors. Darius Buntinas (buntinas@cis...) Please note that version 2 (this file) has a new interface from the original (rdtsc.h). Here are the changes from rdtsc.h: TIME_DECLARE and TIME_DECLARE_EXTERN are no longer used. TIME_INIT is not needed, unless you use USECS_DELAY. Use this as a statement in main() (i.e. TIME_INIT; not as a function: TIME_INIT();). TIME_PRE(x) now takes an unsigned long long parameter. You should pass this parameter to TIME_POST. This allows you to do nested timings. TIME_POST(x) works pretty much as before. The parameter x must have been set by TIME_PRE previously. USECS(x) converts processor cycles to microseconds. Note that this will do some file I/O the first time it is called to determine the processor speed, so you probably don't want to use this between a TIME_PRE and TIME_POST. If you used to use this to implement a delay loop, use USECS_DELAY instead (or if you are really stubborn, and want to use it like this, just make sure you do a TIME_INIT first. It will then work as usual.); USECS_DELAY(x) is a new function which will loop for x microseconds. Because this is a time-critical function, the processor speed must be initialized before this function is called. This can be done explicitly, by doing a TIME_INIT, or implicitly, by calling USECS(). This function returns the actual time delayed in microseconds. To time something between modules (i.e. .o files) in one file declare the timing variable as a global, and in the other file declare is as extern. Then use TIME_PRE and TIME_POST as usual. E.g.: fileA.c: unsigned long long my_timing_tmp_var; ... TIME_PRE(my_timing_tmp_var); ... fileB.c extern unsigned long long my_timing_tmp_var; ... TIME_POST(my_timing_tmp_var); ... Here is a sample program showing how to use TIME_PRE and TIME_POST. #include #include "rdtsc2.h" int main() { unsigned long long tmp; int i = 0; TIME_PRE(tmp); for (i = 0; i < 1000; ++i) printf("hello\n"); TIME_POST(tmp); printf ("1000 printfs took %5.3fus. That's %5.3fus per printf!\n", USECS(tmp), USECS(tmp)/1000); return 0; } ****************************************************************************/ #ifndef __RDTSC_H #define __RDTSC_H #include #include /*#include "asm/msr.h" */ #define rdtsc(x) __asm__ __volatile__("rdtsc" : "=A" (x)) #define TIME_INIT do {__cpuMHz = SetMHz();} while(0) #define TIME_PRE(cycles) rdtsc(cycles) #define TIME_POST(cycles) do { unsigned long long __tmp; \ rdtsc(__tmp); \ (cycles) = __tmp - (cycles); } while (0) #ifdef TIME_DEBUG_MHZ static double __cpuMHz = TIME_DEBUG_MHZ; #else static double __cpuMHz = -1.0; #endif static inline double SetMHz() { double mhz; FILE* f; f = popen("/bin/sed -n '/cpu MHz/s/[^:]*://p' /proc/cpuinfo", "r"); /*f = popen("/home/1/buntinas/bin/catsedn '/cpu MHz/s/[^:]*://p' /proc/cpuinfo", "r");*/ if (!f) { printf ("RDTSC: Error reading cpu speed\n"); exit (-1); } if (fscanf(f, "%lf", &mhz) != 1) { printf ("RDTSC: Error reading cpu speed.\n"); exit (-1); } pclose(f); #if TIME_DEBUG printf("Processor speed = %8.3f\n", mhz); #endif return mhz; } static inline double USECS(unsigned long long cycles) { if (__cpuMHz < 0) __cpuMHz = SetMHz(); return (double)(cycles)/__cpuMHz; } static inline double USECS_DELAY(double usecs) { unsigned long long tmp1, tmp2, delay; double elapsed; rdtsc(tmp1); if (__cpuMHz < 0) { printf ("USECS_DELAY: You need to do a TIME_PRE or USECS() before calling " "USECS_DELAY\n"); exit(-1); } delay = usecs * __cpuMHz; do { rdtsc(tmp2); elapsed = tmp2 - tmp1; } while (elapsed < delay); return (double)elapsed / __cpuMHz; } #endif /* __RDTSC_H */