#include "mpi.h"
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <stdio.h>
#include <math.h>
#include <stddef.h>
#include "buildmesham.h"
#include "timeofday.h"
#define SWAP(a,b) {float swap_temp=(a).r;(a).r=(b).r;(b).r=swap_temp;\
		         swap_temp=(a).i;(a).i=(b).i;(b).i=swap_temp;}
typedef unsigned char byte;	// byte should be equal to a type which is 8 bits. On x86 this is unsigned char.

void doTransposition(byte *,byte *,int,int,int);

DLLIMPORT double MESHbshiftl(int a, int b)
{
	return  a << b;
}

DLLIMPORT void MESHcreateMPIComm(int n,int ranks[],MPI_Comm * newcomm)
{
	MPI_Group newgroup,worldgrp;
	MPI_Comm_group(MPI_COMM_WORLD,&worldgrp);
	MPI_Group_incl(worldgrp,n,ranks,&newgroup); 
	MPI_Comm_create(MPI_COMM_WORLD,newgroup,newcomm);
	int myr;
	MPI_Group_rank(newgroup,&myr);
	if (myr!=MPI_UNDEFINED)
	{
		MESHsetUpCommErrHandler(*newcomm);
	}	
	MPI_Group_free(&newgroup);
}

DLLIMPORT void * MESHtranposefn(void * passdata,int x, int y,int stride)
{
	 byte * data=(byte *) passdata;
	byte * tempdata=(byte *) malloc(stride * x * y);
	doTransposition(data,tempdata,x,y,stride);
	return tempdata;
}

DLLIMPORT void MESHmemcpyandtranspose(void * tempdata,void * passdata,int x, int y,int stride)
{
	 byte * data=(byte *) passdata;
	byte * ntempdata=(byte *) tempdata;
	/*
		Note how we pass in the tempmem, then data like memcpy - diff to how we have 
		done transposition fns up to this point, usually its data then tempmem
	*/
	doTransposition(data,ntempdata,y,x,stride);	// see how we swap these!!	
}

void doTransposition(byte * data, byte * tempdata,int x,int y,int stride)
{
	int i,j,str;
	for (i=0;i< y ;i++)
	{
		for (j=0;j<x;j++)
		{
			int startptr1=((i * x) + j) * stride;
			int startptr2=((j * y) + i) * stride;			
			for (str=0;str < stride;str++)
			{
				tempdata[startptr2 + str] =data[startptr1 + str]; 				
			}			
		}
	}
}

DLLIMPORT int MESHcheckstartup(int shouldbe,int totp,int argcounter, char * arguments[])
{
	if (shouldbe==totp) {return 1;}
	int i;
	unsigned int totalsize=strlen(arguments[0]) + 20;
	unsigned int maxsizearg=0;
	for (i=1;i<argcounter;i++)
	{
		totalsize=totalsize +strlen(arguments[i]) + 1;
		if (maxsizearg < strlen(arguments[i]) + 1)
		{
			maxsizearg=strlen(arguments[i]) + 1;
		}
	}
	char commandline[totalsize];
	sprintf(commandline,"mpiexec -np %d %s\0",shouldbe,arguments[0]);
	char newarg[maxsizearg];	
	for (i=1;i<argcounter;i++)
	{		
		newarg[0]=' ';
		newarg[1]='\0';
		strcat(newarg,arguments[i]);
		strcat(commandline,newarg);
	}
	//printf("%s\n",commandline);
	system(commandline);
	return 0;
}

DLLIMPORT void MESHTransposeWithTempVar(void * passdata,int x,int y,int stride)
{
	byte * data=(byte *) passdata;
	byte * tempdata=(byte*) malloc(stride * x * y * sizeof(byte));
	doTransposition(data,tempdata,x,y,stride);
	memcpy(data,tempdata,stride * x * y);
	free(tempdata);
}

DLLIMPORT void MESHinplacesquaretranspose(void * passdata,int N, int stride)
{
	byte * data=(byte *) passdata;
	int i,j,str;
	for (i=0;i< N -1;i++)
	{
		for (j=i+1;j<N;j++)
		{
			int startptr1=(i * N + j) * stride;
			int startptr2=(j * N + i) * stride;			
			for (str=0;str < stride;str++)
			{
				byte tempstorage;
				tempstorage=data[startptr1 + str];
				data[startptr1 + str] = data[startptr2 + str];
				data[startptr2 + str]=tempstorage;
			}			
		}
	}
}

DLLIMPORT void MESHdisplayhighresepoch()
{
	struct timeval tv;
  	gettimeofday(&tv, NULL);
  	int s=(int) tv.tv_sec;
  	int ms=(int) tv.tv_usec;	
	printf("Seconds:%d Millisecs:%d\n",s,ms);
}

DLLIMPORT double MESHbshiftr(int a, int b)
{
	return  a >> b;
}

DLLIMPORT void MESHbitreverse(mycomplex * a, int n)
{
	//n=512;
  int i,j,k;
  j = 0;
  for (i=0; i<n-2; i++){
    if (i < j)
    {
    	SWAP(a[j],a[i]);
      }
    k = n>>1;
    while (k <= j) {
      j -= k; 
      k >>= 1;
    }
    j += k;
  }
}

DLLIMPORT void MESHCopyArray(void * a,void * b, int numbertocopy)
{
	int i;
	double * af=a;
	double * bf=b;
	for (i=0;i<numbertocopy;i++)
	{
		af[i]=bf[i];
	}	
}
