/*****************************/
/*          FLOPS.c          */
/*  Version 1.2, 29 Feb 1992 */
/*         Al Aburto         */
/*      'ala' on BIX         */
/*  aburto@marlin.nosc.mil   */
/*****************************/

/*
   FLOPS.c is a 'c' program which attempts to estimate your systems scalar
   floating-point 'MFLOP' rating for the scalar +, -, *, and / operations
   based on a specific 'instruction mix'. It attempts to measure the time
   for an FADD and the time to do an empty 'for' loop in microseconds. It
   calculates pi based on the series expansion for atan(1.0). A few
   correction terms are applied to achieve at most a 16 digit accurate
   result. It calculates the area under the sin(x) curve from 0 to pi/3
   using the Trapazoidal rule. Sin(x) is approximated by a polynomial
   series accurate to approximately 1.0e-14 over the specified range. The
   program is designed for double precision. The program does not work
   with arrays and thus it is not an appropriate program for an important
   group of processors (array, vector, and possibly other type machines).
   It gives a Scalar (one dimensional) MFLOP rating, not an N-Scalar or
   vector MFLOP rating. It was originally intended for PC's but it is
   applicable for a wide variety of different machines.

   The Scalar MFLOPS rating is based on a count of 52 total scalar
   floating-point operations per loop (iteration). The instruction mix is:
   FADD: 40.38, FSUB: 23.31, FMUL: 26.92, and FDIV: 9.62 percent. This is
   based on instruction mix data found in the Dhrystone paper by Reinhold
   P. Weicker, Communications of the ACM, Oct 1984, Vol 27, Number 10,
   Page 1013 (Table IV). This is the best available information I found in
   which to base the instruction mix.

   Version 1.2 corrects some problems encountered with Version 1.1. A timer
   routine for UNIX using 'getrusage()' has been added by Markku Kolkka of
   Tampere University of Technology, Finland. This eliminates problems with
   the definition of 'HZ' in the old routine (UNIX_Old). I eliminated the
   'w' sum in the second loop as it did nothing. The FADD time is now
   estimated (the printf is commented out, but you can uncomment it to see
   what it says) and I use it solely to estimate the number of loops to
   conduct: 20 million loops when the program estimates the FADD is faster
   than 1/3 usec, 0.1 million loops for FADD slower than 4 usec, and
   1 million loops otherwise. If the number of loops conducted is not
   properly related to 'loops' then the program prints out a message saying
   the program and results are invalid. Bo Thide' of the Swedish Institute
   of Space Physics, Sweden caught an error in the value for 'piref' and
   that has been corrected. The time to do an empty 'for' loop is also 
   estimated somewhat better since there are now automatic longer loops.

   The V1.2 results are not expected to show any significant difference
   relative to the V1.1 results. I verified this with several machines
   running in the 1 to 6 MFLOP range, but you might want to recheck the
   faster machines. The results do vary even when doing 20 million loops.
   The maximum standard deviation I observed in the systems I checked was
   0.07 MFLOP at the 6 MFLOP range.

   See the next page for 'register' and 'timer' options available in the
   program. Example UNIX compilation is: 'cc -DUNIX -O2 flops.c -o flops',
   or 'cc -DUNIX -DROPT flops.c -o flops', ... , etc.

   NOTE: Please do not remove any of the printouts.

   Al Aburto
*/

#include <stdio.h>
#include <math.h>
				 /* 'Uncomment' the line below to run   */
				 /* with 'register double' variables    */
				 /* defined, or compile with the        */
				 /* '-DROPT' option. Don't need this if */
				 /* registers used automatically.       */
/* #define ROPT */
				 /* 'Uncomment' one of the statements   */
				 /* below to access the right timer     */
				 /* routine for your system, or compile */
				 /* with '-DUNIX' (for example). You    */
				 /* may need to write your own similar  */
				 /* timer routine if you have another   */
				 /* system or compiler not covered by   */
				 /* the options below. I have not tested*/
				 /* the 'MSC' option.                   */
/* #define Amiga    */
/* #define UNIX    */
#define UNIX_Old
/* #define TURBO_C  */
/* #define MSC      */

#ifdef Amiga
#include <ctype.h>
#define HZ   50
#endif

#ifdef UNIX
#include <sys/time.h>
#include <sys/resource.h>
struct rusage rusage;
#endif

#ifdef UNIX_Old
#include <sys/types.h>
#include <sys/times.h>
#include <sys/param.h>
#ifndef HZ
#define HZ   60
#endif
struct tms tms;
#endif

#ifdef TURBO_C
#include <ctype.h>
#include <dos.h>
#include <time.h>
#define HZ   100
struct time now;
#endif

#ifdef MSC
#include <time.h>
#include <ctype.h>
#define HZ   CLK_TCK
clock_t  tnow;
#endif


double TimeArray[3];             /* Time Array needed for 'dtime()'.    */

double T[25];                    /* Global Array used to hold timing    */
				 /* results and other information.      */

double sa,sb,sc,sd,one,two,three;
double four,five,piref,piprg;
double pierr,delta;

double A0 = 1.0;
double A1 =-0.1666666666671334;
double A2 = 0.833333333809067E-2;
double A3 = 0.198412715551283E-3;
double A4 = 0.27557589750762E-5;
double A5 = 0.2507059876207E-7;
double A6 = 0.164105986683E-9;


int main()
{

#ifdef ROPT
   register double s,u,v,w,x;
#else
   double s,u,v,w,x;
#endif

   long  i, loops, m, n;

   printf("\n");
   printf("   FLOPS C Program (Double Precision)\n");
   printf("       Version 1.2, 29 Feb 1992\n\n");

			     /******************************/
   loops = 1000000;          /* Number of loops conducted. */
			     /******************************/

/****************************************************/
/* Set Global Variable Values.  T[24] References    */
/* All Timing Results Relative To 1 Million Loops.  */
/****************************************************/

   T[24] = 1.0e+06/(double)loops;

   piref = 3.14159265358979324;
   one  = 1.0;
   two  = 2.0;
   three= 3.0;
   four = 4.0;
   five = 5.0;
     sd = one;

/**********************************/
/* Module 1.  Estimate Loop time  */
/*            and FADD time.      */
/**********************************/

   u = 0.0;                                        /*********************/
   v = one;                                        /* Loop 1.           */
						   /*********************/
   dtime(TimeArray);
   for( i = 1 ; i<= loops ; i++ )
   {
   u = u + v;
   }
   dtime(TimeArray);
   T[19] = T[24] * TimeArray[1];
   m = (long)u;
						   /*********************/
   u = 0.0;                                        /* Loop 2.           */
   s = 0.0;                                        /*********************/

   dtime(TimeArray);
   for( i = 1 ; i<= m ; i++ )
   {
   u = u + v;
   s = s + u;
   }
   dtime(TimeArray);
   T[20] = T[24] * TimeArray[1];
						   /********************/
   T[21] = two * T[19] - T[20];                    /* Loop Time (usec) */
   if ( T[21] < 0.0 ) T[21] = 0.0;                 /* First Estimate.  */
						   /********************/
   n = (long)( two * ( s / u ) - one );
						   /********************/
   T[22] = T[20] - T[19];                          /* FADD Time (usec) */
   if ( T[22] < 0.0 ) T[22] = 0.0;                 /* First Estimate.  */
						   /********************/
   m = n;

   if ( T[22] < (one / three) )
   {
   sd    = one / 20.0;
   m     = 20 * n;
   T[24] = 1.0e+06/(double)m;
   }

   if ( T[22] > four )
   {
   sd    = 10.0;
   m     = n / 10;
   T[24] = 1.0e+06/(double)m;
   }

						   /*********************/
   u = 0.0;                                        /* Loop 3.           */
						   /*********************/
   dtime(TimeArray);
   for( i = 1 ; i<= m ; i++ )
   {
   u = u + v;
   }
   dtime(TimeArray);
   T[1] = T[24] * TimeArray[1];
   m = (long)u;
						   /*********************/
   u = 0.0;                                        /* Loop 4.           */
   s = 0.0;                                        /*********************/

   dtime(TimeArray);
   for( i = 1 ; i<= m ; i++ )
   {
   u = u + v;
   s = s + u;
   }
   dtime(TimeArray);
   T[2] = T[24] * TimeArray[1];
						   /********************/
   T[3] = two * T[1] - T[2];                       /* Loop Time (usec) */
   if ( T[3] < 0.0 ) T[3] = 0.0;                   /* Second Estimate. */
						   /********************/
   m = (long)( two * ( s / u ) - v );
						   /********************/
   T[23] = T[2] - T[1];                            /* FADD Time (usec) */
   if ( T[23] < 0.0 ) T[23] = 0.0;                 /* Second Estimate. */
						   /********************/

   T[3]  = ( T[3] + T[21] ) / two;
   T[23] = ( T[23] + T[22] ) / two;

/*
   printf("   Loop Time (usec) = %10.4lf\n",T[3]);
   printf("   FADD Time (usec) = %10.4lf\n\n",T[23]);
*/

/*******************************************************/
/* Module 2.  Calculate Value Of PI From Taylor Series */
/*            Expansion Of atan(1.0).  There Are 7     */
/*            Double Precision Operations Per Loop     */
/*            ( 3 +, 2 -, 1 *, and 1 / ) That are      */
/*            Included in The Timing.                  */
/*******************************************************/
  
   s  =-five;                                      /********************/
   sa =-one;                                       /* Loop 5.          */
						   /********************/
   dtime(TimeArray);
   for ( i = 1 ; i <= m ; i++ )
   {
   s  =-s;
   sa = sa + s;
   }
   dtime(TimeArray);
   T[4] = T[24] * TimeArray[1];
   if ( T[4] < 0.0 ) T[4] = 0.0;

   sc   = (double)m;

   u = sa;                                         /*********************/
   v = 0.0;                                        /* Loop 6.           */
   w = 0.0;                                        /*********************/
   x = 0.0;

   dtime(TimeArray);
   for ( i = 1 ; i <= m ; i++)
   {
   s  =-s;
   sa = sa + s;
   u  = u + two;
   x  = x +(s - u);
   v  = v - s * u;
   w  = w + s / u;
   }
   dtime(TimeArray);
   T[5] = T[24] * TimeArray[1];

   T[6] = T[5] - T[4];                             /*********************/
   m = (long)( sa * x  / sc );                     /*  PI Results       */
   sa = four * w / five;                           /*********************/
   sb = sa + five / v;
   sc = 31.25;
   piprg = sb - sc / (v * v * v);
   pierr = piprg - piref;
						   /*********************/
						   /*   DO NOT REMOVE   */
						   /*  THESE PRINTOUTS! */
						   /*********************/
   printf("   PI: Program     = %20.17lf\n",piprg);
   printf("   PI: Reference   = %20.17lf\n",piref);
   printf("   PI: Error       =%13.4le\n\n",pierr);

/*******************************************************/
/* Module 3.  Calculate Area Under sin(x) Curve From   */
/*            0.0 To PI/3.0 Using Trapazoidal Method.  */
/*            Result is 0.5 .  There Are 17 Double     */
/*            Precision Operations Per Loop ( 6 +, 2 -,*/
/*            9 *, and 0 / ) Included In The Timing.   */
/*******************************************************/
  
   delta = piref / ( three * (double)m );          /*********************/
   s = 0.0;                                        /*  Loop 7.          */
   v = 0.0;                                        /*********************/

   dtime(TimeArray);
   for( i = 1 ; i <= m ; i++ )
   {
   v = v + one;
   u = v * delta;
   w = u * u;
   s = s + u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+A0);
   }
   dtime(TimeArray);
   T[7]  = T[24] * TimeArray[1];

   u  = piref / three;
   w  = u * u;
   sa = u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+A0) / two;
   m  = (long)v;
						   /*********************/
   sa = delta * ( s - sa );                        /* Area Results.     */
   sb = 0.5;                                       /*********************/
   sc = sa - sb;
						   /*********************/
						   /*   DO NOT REMOVE   */
   printf("   Area: Program   = %20.17lf\n",sa);   /*  THESE PRINTOUTS! */
   printf("   Area: Reference = %20.17lf\n",sb);   /*********************/
   printf("   Area: Error     =%13.4le\n\n",sc);


   T[8]  = T[7] - T[3];
   T[9]  = ( five * T[6] + T[8] ) / 52.0;
   T[10] = one / T[9];
						   /*********************/
						   /*   DO NOT REMOVE   */
						   /*  THESE PRINTOUTS! */
						   /*********************/
   printf("   Iterations      = %9ld\n",m);
   printf("   BenchTime(usec) = %9.4lf\n",T[9]);
   printf("   Scalar MFLOPS   = %9.4lf\n\n",T[10]);

   x = sd * (double)m;
   if ( (long)x != loops )
   {
   printf("   Results are bogus! It appears that an optimizatio\n\n");
   printf("   was done which invalidates the program and results.\n\n");
   }

}


/********************************************************/
/* dtime() outputs the elapsed time in p[1] from the    */
/* first call of dtime() to the second call of dtime(). */
/********************************************************/
#ifdef Amiga
dtime(p)
double p[];
{
   double q;

   struct   tt {
      long  days;
      long  minutes;
      long  ticks;
   } tt;

   q = p[2];

   DateStamp(&tt);

   p[2] = ( (double)(tt.ticks + (tt.minutes * 60L * 50L)) ) / (double)HZ;
   p[1] = p[2] - q;
   return 0;
}
#endif

#ifdef UNIX
dtime(p)
double p[];
{
   double q;

   q = p[2];

   getrusage(RUSAGE_SELF,&rusage);

   p[2] = (double)(rusage.ru_utime.tv_sec);
   p[2] = p[2] + (double)(rusage.ru_utime.tv_usec) / 1.0e+06;
   p[1] = p[2] - q;
   return 0;
}
#endif

#ifdef UNIX_Old
dtime(p)
double p[];
{
   double q;

   q = p[2];

   times(&tms);

   p[2] = (double)(tms.tms_utime) / (double)HZ;
   p[1] = p[2] - q;
   return 0;
}
#endif


#ifdef TURBO_C
dtime(p)
double p[];
{
   double q;

   q = p[2];

   gettime(&now);

   p[2] = 60.0 * (double)(now.ti_min);
   p[2] = p[2] + (double)(now.ti_sec);
   p[2] = p[2] + (double)(now.ti_hund)/(double)HZ;
   p[1] = p[2] - q;
   return 0;
}
#endif


#ifdef MSC
dtime(p)
double p[];
{
   double q;

   q = p[2];

   tnow = clock();

   p[2] = (double)tnow / (double)HZ;
   p[1] = p[2] - q;
   return 0;
}
#endif