/*
 * latency measurement program
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <fcntl.h>
#include <sched.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <signal.h>
#include <errno.h>
#include <sys/poll.h>
#include <linux/soundcard.h>
#include <linux/rtc.h>

#include "measure.h"

#define DEBUG 0

#define MAX_TIME_RUN	100000
#define MAX_OVERRUNS	1000

static int max_time_run = MAX_TIME_RUN;
static float cpu_load = 0.80;	/* default */
static float tolerance = 2.0;	/* default */
static int do_real_time = 1;
static char *output_data_file = NULL;
static char *output_png_file = "out.png";
static char *profile_file = NULL;
static FILE *profile_fd;

static inline unsigned long long int rdtsc(void)
{
	unsigned long long int x, y;
	for (;;) {
		__asm__ volatile ("rdtsc" : "=A" (x));
		__asm__ volatile ("rdtsc" : "=A" (y));
		if (y - x < 1000)
			return y;
	}
}

static unsigned long long time_offset;

#define mygettime() (rdtsc() - time_offset)

static int calibrate_loop(void);

static double cpu_hz;

inline static double wallclock(unsigned long long sc)
{
	return (double)sc / cpu_hz;
}

static int init_test(const char *device_file, int freq, int count, int use_rtc);
inline static int start_trigger(int use_rtc);
static int set_realtime_priority(void);
static int set_normal_priority(void);
static void my_exithandler(int);
static void mydelay(int loops);
inline static int get_sync(struct latency_test_info *info, int use_rtc);

static struct latency_stat stinfo;
static float total_latency_minus_1ms, total_latency_plus_1ms;
static float total_latency_minus_2ms, total_latency_plus_2ms;
static float cpu_latency_minus_02ms, cpu_latency_plus_02ms;
static float cpu_latency_minus_01ms, cpu_latency_plus_01ms;

static struct latency_info rtinfo[MAX_TIME_RUN+1];
static struct latency_test_info stacks[MAX_OVERRUNS+1];

extern void draw_chart(char *output_file, struct latency_stat *st, struct latency_info *rt);


static void usage(void)
{
	fprintf(stderr, "usage: measure [options]\n");
	fprintf(stderr, "\noptions:\n");
	fprintf(stderr, "  -I          Don't use latency-test kernel module\n");
	fprintf(stderr, "  -D file     specify the device file to communicate with latency-kernel module\n");
	fprintf(stderr, "  -c val      set the CPU load (0-1\n");
	fprintf(stderr, "  -f val      set the RTC frequency (must be power of two)\n");
	fprintf(stderr, "  -n val      set wake up count (>= 1)\n");
	fprintf(stderr, "  -t val      set the deadline tolerance ratio (default = 2)\n");
	fprintf(stderr, "  -N val      set max. loop numbers (default %d)\n", MAX_TIME_RUN);
	fprintf(stderr, "  -p file     output PNG file (default: out.png)\n");
	fprintf(stderr, "  -o file     output data file (default: none)\n");
	fprintf(stderr, "  -u          run in non-realtime priority\n");
	fprintf(stderr, "  -P file     output lock latency\n");
}


#define TONE_QUIET	0
#define TONE_GENERATED	1
#define TONE_SAMPLE	2

int main(int argc,char **argv)
{
	int use_rtc;
	int wakeup_count;
	int frequency;
	double sched_diff;
	double cpu_diff;
	int loops_per_run;
	double loops_per_sec;
	double dev;
	unsigned long long start_sc;
	int res, err;
	int c;
	char *devfile = "/dev/midi0";


	time_offset = rdtsc(); /* initialization */

	use_rtc = 0;
	frequency = 1024;
	wakeup_count = 2;

	signal(SIGTERM, my_exithandler);
	signal(SIGINT, my_exithandler);

	while ((c = getopt(argc, argv, "Ic:D:f:n:t:p:o:N:uP:")) != -1) {
		switch (c) {
		case 'I':
			use_rtc = 1;
			break;
		case 'c':
			cpu_load = atof(optarg);
			break;
		case 'D':
			devfile = optarg;
			break;
		case 'f':
			frequency = atoi(optarg);
			break;
		case 'n':
			wakeup_count = atoi(optarg);
			break;
		case 't':
			tolerance = atof(optarg);
			break;
		case 'p':
			output_png_file = optarg;
			break;
		case 'o':
			output_data_file = optarg;
			break;
		case 'N':
			max_time_run = atoi(optarg);
			if (max_time_run > MAX_TIME_RUN)
				max_time_run = MAX_TIME_RUN;
			break;
		case 'u':
			do_real_time = 0;
			break;
		case 'P':
			profile_file = optarg;
			break;
		default:
			usage();
			exit(1);
		}
	}

	if (do_real_time) {
		if(DEBUG)  fprintf(stderr,"calling mlockall() to prevent pagefaults ....\n");
		if (mlockall(MCL_CURRENT|MCL_FUTURE)) {
			perror("mlockall() failed, exiting. mlock");
			exit(1);
		}

		res = set_realtime_priority();
		if (res < 0) {
			fprintf(stderr,"can't get realtime priority, run the program as root.\n");
			exit(1);
		}
		if (DEBUG) fprintf(stderr,"got realtime scheduling %d\n", res);
	}

	res = init_test(devfile, frequency, wakeup_count, use_rtc);
	if (res < 0)
		exit(1);
	printf("rtc setup: freq=%d count=%d\n", frequency, wakeup_count);

	loops_per_sec = calibrate_loop();
	loops_per_run = (int)((loops_per_sec * wakeup_count / frequency) * cpu_load);
	printf("cpu_load=%f  loops per run = %d\n",cpu_load, loops_per_run);

	/* calculate parameters */
	stinfo.sched_latency = (double)wakeup_count / (double)frequency;
	stinfo.cpu_latency = stinfo.sched_latency * cpu_load;
	stinfo.deadline = stinfo.sched_latency * tolerance;

	total_latency_minus_1ms = stinfo.sched_latency - 0.001;
	total_latency_plus_1ms = stinfo.sched_latency + 0.001;
	total_latency_minus_2ms = stinfo.sched_latency - 0.002;
	total_latency_plus_2ms = stinfo.sched_latency + 0.002;

	cpu_latency_minus_02ms = stinfo.cpu_latency - 0.0002;
	cpu_latency_plus_02ms = stinfo.cpu_latency + 0.0002;
	cpu_latency_minus_01ms = stinfo.cpu_latency - 0.0001;
	cpu_latency_plus_01ms = stinfo.cpu_latency + 0.0001;

	printf("total latency = %f ms\n", stinfo.sched_latency*1000.0);
	printf("cpu latency = %f ms\n", stinfo.cpu_latency*1000.0);

	if (profile_file) {
		int fd;
		if ((fd = open("/proc/latencytimes", O_RDONLY)) >= 0) {
			profile_fd = fopen(profile_file, "w");
			close(fd);
		}
	}

	start_sc = mygettime();
	stinfo.start_time = wallclock(start_sc);
	fprintf(stderr, "start time = %g\n", stinfo.start_time);

	start_trigger(use_rtc);
	for (;;) {
		int nrun = stinfo.num_runs;
		unsigned long long time_head, time_cpu, time_sync;

		time_head = mygettime();
		if (cpu_load > 0) {
			mydelay(loops_per_run);
			time_cpu = mygettime();
		} else {
			time_cpu = time_head;
		}

		err = get_sync(&stacks[stinfo.overruns], use_rtc);
		if (err < 0) {
			fprintf(stderr, "fatal ioctl error %d\n", -err);
			exit(1);
		}
		time_sync = mygettime();

		sched_diff = wallclock(time_sync - time_head);
		cpu_diff = wallclock(time_cpu - time_head);

		rtinfo[nrun].elapsed = wallclock(time_sync - start_sc);
		rtinfo[nrun].sched_diff = sched_diff;
		rtinfo[nrun].cpu_diff = cpu_diff;

		if (sched_diff >= total_latency_minus_1ms &&
		    sched_diff <= total_latency_plus_1ms)
			stinfo.sched_in_1ms++;

		if (sched_diff >= total_latency_minus_2ms &&
		    sched_diff <= total_latency_plus_2ms)
			stinfo.sched_in_2ms++;

		if (cpu_diff >= cpu_latency_minus_01ms &&
		    cpu_diff <= cpu_latency_plus_01ms)
			stinfo.cpu_in_01ms++;

		if (cpu_diff >= cpu_latency_minus_02ms &&
		    cpu_diff <= cpu_latency_plus_02ms)
			stinfo.cpu_in_02ms++;

		if (cpu_diff > stinfo.max_cpu_diff)
			stinfo.max_cpu_diff = cpu_diff;

		if (sched_diff > stinfo.max_diff)
			stinfo.max_diff = sched_diff;

		dev = (sched_diff - stinfo.sched_latency) / stinfo.sched_latency;
		stinfo.deviation += dev * dev;

		if (sched_diff >= stinfo.deadline) {
			if (stinfo.overruns < MAX_OVERRUNS) {
				stinfo.overruns++;
				rtinfo[nrun].with_stack = stinfo.overruns;
			}
			if (profile_fd) {
				int lfd;
				lfd = open("/proc/latencytimes", O_RDONLY);
				if (lfd >= 0) {
					char tmp[4096];
					int len;
					len = read(lfd, tmp, sizeof(tmp));
					fprintf(profile_fd, "#%g\n", rtinfo[nrun].elapsed);
					fwrite(tmp, 1, len, profile_fd);
					close(lfd);
				}
			}
		}

		nrun++;
		stinfo.num_runs = nrun;
		if (nrun >= max_time_run)
			my_exithandler(0);
	}

	return(0);
}


/*
 */
static int devfd = -1;

static int init_test(const char *device_file, int frequency, int wakeup_count, int use_rtc)
{
	int i;

	if (frequency > 8192 || frequency < 64) {
		fprintf(stderr, "invalid freq = %d\n", frequency);
		return -EINVAL;
	}
	for (i = 0; i < 16; i++)
		if (frequency & (1 << i))
			break;
	if (frequency != (1 << i)) {
		fprintf(stderr, "invalid freq = %d\n", frequency);
		return -EINVAL;
	}
	if (use_rtc && wakeup_count != 1) {
		fprintf(stderr, "invalid wakeup count = %d\n", wakeup_count);
		return -EINVAL;
	}
	if (wakeup_count < 0 || wakeup_count > frequency) {
		fprintf(stderr, "invalid wakeup count = %d\n", wakeup_count);
		return -EINVAL;
	}

	if (use_rtc) {
		devfd = open("/dev/rtc", O_RDWR);
		if (devfd < 0) {
			fprintf(stderr, "error opening device\n");
			return -errno;
		}

		if (ioctl(devfd, RTC_IRQP_SET, (unsigned long)frequency) < 0) {
			fprintf(stderr, "error setting freq %d\n", frequency);
			return -errno;
		}

	} else {
		devfd = open(device_file, O_RDWR);
		if (devfd < 0) {
			fprintf(stderr, "error opening device\n");
			return -errno;
		}

		if (ioctl(devfd, LAT_TEST_FREQ, (unsigned long)frequency) < 0) {
			fprintf(stderr, "error setting freq %d\n", frequency);
			return -errno;
		}
		if (ioctl(devfd, LAT_TEST_COUNT, (unsigned long)wakeup_count) < 0) {
			fprintf(stderr, "error setting count %d\n", wakeup_count);
			return -errno;
		}
	}

	return 0;
}

inline static int start_trigger(int use_rtc)
{
	if (use_rtc)
		return ioctl(devfd, RTC_PIE_ON);
	else
		return ioctl(devfd, LAT_TEST_START, 0);
}

static int set_realtime_priority(void)
{
	struct sched_param schp;
	/*
	 * set the process to realtime privs
	 */
        memset(&schp, 0, sizeof(schp));
	schp.sched_priority = sched_get_priority_max(SCHED_FIFO);

	if (sched_setscheduler(0, SCHED_FIFO, &schp) != 0) {
		perror("sched_setscheduler");
		return -1;
	}

	return schp.sched_priority;

}

static int set_normal_priority(void)
{
	struct sched_param schp;
	/*
	 * set the process to realtime privs
	 */
        memset(&schp, 0, sizeof(schp));
	schp.sched_priority = 0;

	if (sched_setscheduler(0, SCHED_OTHER, &schp) != 0) {
		perror("sched_setscheduler");
		return -1;
	}

	return 0;

}

/*
 */
inline static int get_sync(struct latency_test_info *info, int use_rtc)
{
	if (use_rtc) {
		struct pollfd pfd;
		unsigned long t;
		pfd.fd = devfd;
		pfd.fd = POLLIN;
		if (poll(&pfd, 1, -1) < 0)
			return -errno;
		if (read(devfd, &t, sizeof(t)) < 0)
			return -errno;
	} else {
		if (ioctl(devfd, LAT_TEST_READ, info) < 0)
			return -errno;
	}
	return 0;
}

/*
 */
static void my_exithandler(int sig)
{
	FILE *f;

	stinfo.end_time = wallclock(mygettime());

	set_normal_priority();

	if (devfd >= 0)
		close(devfd);

	if (stinfo.num_runs) {
		printf("max diff = %g ms (overrun %d)\n",
		       stinfo.max_diff * 1000.0, stinfo.overruns);
		printf("within 1ms = %d, factor = %g%%\n",
		       stinfo.sched_in_1ms, stinfo.sched_in_1ms*100.0 / stinfo.num_runs);
		printf("within 2ms = %d, factor = %g%%\n",
		       stinfo.sched_in_2ms, stinfo.sched_in_2ms*100.0 / stinfo.num_runs);
		stinfo.deviation = sqrt(stinfo.deviation / stinfo.num_runs);
		printf("deviation = %g\n", stinfo.deviation);
	}

	if (output_png_file)
		draw_chart(output_png_file, &stinfo, rtinfo);

	if (output_data_file) {
		if ((f = fopen(output_data_file, "w")) != NULL) {
			int i;
			for (i = 0; i < stinfo.num_runs; i++) {
				fwrite(&rtinfo[i], sizeof(*rtinfo), 1, f);
				if (rtinfo[i].with_stack)
					fwrite(&stacks[rtinfo[i].with_stack - 1],
					       sizeof(*stacks), 1, f);
			}
			fclose(f);
		}
	}

	exit(0);
}

static int calibrate_loop(void)
{
	FILE *f;
	char *res;
	char s1[100];
	double tmp_loops_per_sec;
	unsigned long long mytime1, mytime2;

	f = fopen("/proc/cpuinfo", "r");
	if (f==NULL) {
		perror("can't open /proc/cpuinfo, exiting. open");
		exit(1);
	}

	for(;;) {
		res=fgets(s1,100,f);
		if(res==NULL) break;
		if(!memcmp(s1,"cpu MHz",7)) {
			cpu_hz=atof(&s1[10])*1000000.0;
			break;
		}
	}
	fclose(f);
	if(cpu_hz < 1.0) {
		fprintf(stderr,"can't determine CPU clock frequency, exiting.\n");
	}


	printf("cpu_hz = %g\n", cpu_hz);
	if(DEBUG) fprintf(stderr, "calibrating loop ....\n");

#define CALIB_LOOPS 200000000

	mytime1 = mygettime();
	mydelay(CALIB_LOOPS);
	mytime2 = mygettime();

	tmp_loops_per_sec = CALIB_LOOPS / wallclock(mytime2 - mytime1);

	return tmp_loops_per_sec;
}

/* busy loop */
static void mydelay(int loops)
{
	int k=0;
	int u;
	for(u=0;u<loops;u++) k+=1;
}

