#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <netdb.h>
#include <errno.h>
#include <sched.h>

#define rdtscll(val) \
     __asm__ __volatile__ ("rdtsc" : "=A" (val))

unsigned long mhz;

int get_mhz(void)
{
	FILE *f = fopen("/proc/cpuinfo", "r");
	if (f == 0) {
		perror("can't open /proc/cpuinfo\n");
		exit(1);
	}

	system("cat /proc/cpuinfo");

	for ( ; ; ) {
		int mhz;
		int ret;
		char buf[1000];

		if (fgets(buf, sizeof(buf), f) == NULL) {
			fprintf(stderr, "cannot locate cpu MHz in /proc/cpuinfo\n");
			exit(1);
		}

		ret = sscanf(buf, "cpu MHz         : %d", &mhz);

		if (ret == 1) {
			fclose(f);
			return mhz;
		}
	}
	return 0;
}


/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/

/* 8 megs should be enough cache for anyone */
#define UNCACHED_SIZE	(8 * 1024 * 1024)

#define LARGE	UNCACHED_SIZE
#define MAXBUF	(LARGE + 0x20000)
/* #define TIMES	100 */
/* #define TIMES	20 */
/* #define TIMES	10 */
#define TIMES	5
/* #define TIMES	3 */
/* #define TIMES	2 */
/* #define TIMES	1 */
char _FromBuf[MAXBUF];
char _ToBuf[MAXBUF];
char *FromBuf;
char *ToBuf;

unsigned int IoSize = 8192;

int sock;
int csock;
int nsock;

#define SERVER	"127.0.0.1"

/* extern int do_send_recv(int, unsigned); */

void clear_buffer(char code)
{
	memset(_FromBuf, code, MAXBUF);
	memset(_ToBuf, 0, MAXBUF);
}


void doit(void (*netio)(int , unsigned), int off, unsigned size, char code, char *name) {
	unsigned long long in, out;
	int i;
	clear_buffer(code);
	fprintf(stderr, "%s(off:%d, size:0x%x) ", name, off, size);
	rdtscll(in);
	for (i = 0; i < TIMES; i++) {
		netio(off, size);
	}
	rdtscll(out);
#if 0
	if (memcmp(to_buf, from_buf, LARGE))
		fprintf(stderr, "memcmp error %s(%p, %p, %u) \n", name, to_buf, from_buf, size);
	if (((char*)to_buf)[LARGE])
		fprintf(stderr, "overflow error %s(%p, %p, %u) \n", name, to_buf, from_buf, size);
#endif
	{
		float f_total_copied;
		float seconds;
		float bytes_per_sec;

		f_total_copied = size * TIMES;
		seconds = out - in;
		seconds /= (mhz * 1000000);
		bytes_per_sec = f_total_copied / seconds;

		fprintf(stderr, "%9s: copied %.1f Mbytes in %.3f seconds at %.1f Mbytes/sec\n",
					name, f_total_copied / (1024.0 * 1024.0),
					seconds, bytes_per_sec / (1024.0 * 1024.0));
/* 		*speed = bytes_per_sec / (1024.0 * 1024.0); */
	}
}


int
server_init(void* arg)
{
	struct sockaddr_in sa ;
	struct sockaddr_in nsa ;
	unsigned short port = 9999;
	int optval;

	socklen_t addrlen = sizeof(struct sockaddr_in);
	if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
		perror("socket");
		return ;
	}

	bzero((char *) &sa, sizeof(sa)) ;
	sa.sin_family = AF_INET ;
	sa.sin_addr.s_addr = htonl(INADDR_ANY) ;
	sa.sin_port = htons(port) ;

	if (bind(sock, (struct sockaddr *) &sa, sizeof(sa)) < 0) {
		perror("bind");
		return ;
	}

	optval = 1;
	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(int)) < 0) {
		perror("setsockopt:SO_REUSEADDR");
		return ;
	}

	if (listen(sock, 5) < 0) {
		perror("listen");
		return ;
	}

	if ((nsock = accept(sock, NULL, &addrlen)) < 0) {
		perror("accept");
		return ;
	}
}

client_init()
{
	struct sockaddr_in csa2 ;
	unsigned short port = 9999;

	if ((csock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
		perror("socket");
		return ;
	}

	bzero((char *) &csa2, sizeof(csa2)) ;
	csa2.sin_family = AF_INET ;
	csa2.sin_addr.s_addr = inet_addr(SERVER) ;
	csa2.sin_port = htons(port) ;

	if (connect(csock, (struct sockaddr *) &csa2, sizeof(csa2)) < 0) {
		perror("connect");
		return ;
	}
}



void sock_init()
{
	socklen_t addrlen = sizeof(struct sockaddr_in);
	int pid;
	int arg = 0;
	int stack[0x110000];

	if ((pid = __clone(server_init, &stack[0x100000], CLONE_VM|CLONE_FS|CLONE_FILES, &arg)) < 0) {
		perror("clone");
	}
	sleep(1);
	client_init();
	sleep(1);

}

#define min(a,b) ((a) < (b) ? (a) : (b))

void
do_send_recv(int off, unsigned size)
{
	int i, len, len2;
	char *p = FromBuf + off;
	char *q = ToBuf + off;
	int nsend;

	while (size) {
		if ((len = send(csock, p, min(0xc000, size), 0)) < 0) {
			perror("send");
			exit(1);
		}
		size -= len;
		nsend += len;
		p += len;
again:
		if ((len2 = recv(nsock, q, 0xc000, 0)) < 0) {
			perror("recv");
			exit(1);
		}
		if(len2 ==0)
			break;
		q += len2;
		len -= len2;
		if (len > 0)
			goto again;
		if (p >= FromBuf + off + IoSize) {
			p = FromBuf + off;
			q = ToBuf + off;
		}

	}
}

SetBufferSize(int size)
{
	IoSize = size;
	fprintf(stderr, "######################### buffer size (0x%x:%d) ######################\n", size, size);
}

void DoTest()
{
	int i;
	int code='a';

	sock_init();

	SetBufferSize(1024*8);	/* 8KB */
	for (i = 0; i <= 8; i++)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i--; i <= 32; i+=2)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i-=2; i <= 128; i+=4)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i=1024-128; i <= 1024+128; i+=8)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i=4096-128; i <= 4096+128; i+=8)
		doit(do_send_recv, i, LARGE, code++, "send/recv");

	SetBufferSize(1024*64); /* 64KB */
	for (i = 0; i <= 8; i++)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i--; i <= 32; i+=2)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i-=2; i <= 128; i+=4)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i=1024-128; i <= 1024+128; i+=8)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i=4096-128; i <= 4096+128; i+=8)
		doit(do_send_recv, i, LARGE, code++, "send/recv");

	SetBufferSize(1024*1024); /* 1MB */
	for (i = 0; i <= 8; i++)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i--; i <= 32; i+=2)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i-=2; i <= 128; i+=4)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i=1024-128; i <= 1024+128; i+=8)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
	for (i=4096-128; i <= 4096+128; i+=8)
		doit(do_send_recv, i, LARGE, code++, "send/recv");
}


int
main()
{
	mhz = get_mhz();
	fprintf(stderr, "cpu: %lu MHz\n", mhz);

	FromBuf = (char*)((unsigned long)&_FromBuf[0x4000] & ~(0x1000-1));
	ToBuf = (char*)((unsigned long)&_ToBuf[0x4000] & ~(0x1000-1));
	fprintf(stderr, "\n################ to(%p) from(%p) ##############\n", ToBuf, FromBuf);
	DoTest();

	return 0;
}

