#include "mpi.h"
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "buildmesham.h"
#define REQUESTONLY 0
#define REQUESTANDMEMCPY 1
#define ONESIDEDWINDOW 2
#define BUFFEREDVAR 3
typedef struct llist {struct llist * prev;MPI_Request * asyncrequest; struct llist * next; char * varname; int mode; void * cpyinto ; void *src; int bytes;char * cpyintoname;pthread_t * copythread;MPI_Win * onesidedwindow;void * buffer;} List;
List * head=NULL;
int getNumAllRequests();
int performonesidedwindowifneeded(char *);
int performbufferedcycleifneeded(char *);
int getNumRequestsPerVariable(char *);
void removeAllRequests();
void removeRequestsOfVars(char *);
void dosyncononesidedwindows();
void dobuffercycles();
void fillRequestArrayForAll(MPI_Request [], int);
void fillRequestArrayForVar(MPI_Request [], char *,int);
void honourMemCpyForVar(char *);
void honourMemCpyForAll();
void *asyncupdatememthread(List *);
int suspendthreads=0;
void *asyncupdatememthread(List * mydetails)
{
	int testr=0;
	while(testr==0){if (suspendthreads==0) {MPI_Test(mydetails->asyncrequest,&testr,MPI_STATUS_IGNORE);}}
	while (suspendthreads==1) {}
	if (mydetails->mode==REQUESTANDMEMCPY)
	{						
		memcpy(mydetails->cpyinto,mydetails->src,mydetails->bytes);
		free(mydetails->cpyintoname);
		free(mydetails->copythread);
	} 			
	removeRequestsOfVars(mydetails->varname);	// now remove the sync request as sync has been performed - not needed
}

DLLIMPORT void MESHRegisterOneSidedWindow(MPI_Win * windowtosync,char * varname)
{
	if (getNumRequestsPerVariable(varname) > 0) {return ;}
	List * newitem=malloc(sizeof(List));
	//need to issue malloc as the memory passed here is on the stack frame
	MPI_Win * newwin=malloc(sizeof(MPI_Win));
	memcpy(newwin,windowtosync,sizeof(MPI_Win));
	newitem->onesidedwindow=newwin;
	
	char * newvarname=malloc(sizeof(char) * strlen(varname));
	strcpy(newvarname,varname);
	newitem->varname=newvarname;
	newitem->prev=NULL;
	newitem->next=NULL;
	newitem->mode=ONESIDEDWINDOW;	
	if (head==NULL)
	{
		head=newitem;
	} else {
		newitem->next=head;
		head->prev=newitem;
		head=newitem;
	}
}

DLLIMPORT void MESHRegisterBufferedVar(char * varname,void * buffer, int buffersize)
{
	List * newitem=malloc(sizeof(List));
	char * newvarname=malloc(sizeof(char) * strlen(varname));
	strcpy(newvarname,varname);
	newitem->varname=newvarname;
	newitem->bytes=buffersize;
	newitem->buffer=buffer;
	newitem->prev=NULL;
	newitem->next=NULL;
	newitem->mode=BUFFEREDVAR;
	if (head==NULL)
	{
		head=newitem;
	} else {
		newitem->next=head;
		head->prev=newitem;
		head=newitem;
	}
}

DLLIMPORT void MESHRegisterAsyncRequest(MPI_Request * req,char * varname)
{
	List * newitem=malloc(sizeof(List));
	//need to issue malloc as the memory passed here is on the stack frame
	MPI_Request * newreq=malloc(sizeof(MPI_Request));
	memcpy(newreq,req,sizeof(MPI_Request));
	newitem->asyncrequest=newreq;
	
	char * newvarname=malloc(sizeof(char) * strlen(varname));
	strcpy(newvarname,varname);
	newitem->varname=newvarname;
	newitem->prev=NULL;
	newitem->next=NULL;
	newitem->mode=REQUESTONLY;	
	if (head==NULL)
	{
		head=newitem;
	} else {
		newitem->next=head;
		head->prev=newitem;
		head=newitem;
	}
}

DLLIMPORT void MESHinformdeadvariable(char * varname)
{
	// if a variable goes out of scope do not memcpy or update it with a sync
	suspendthreads=1; 
	List * ch=head;
	while (ch != NULL)
	{	
		if (ch->mode==REQUESTANDMEMCPY)
		{
			if (strcmp(ch->cpyintoname,varname)==0) 
			{
				ch->mode=REQUESTONLY;
				free(ch->cpyintoname);				
				free(ch->copythread);
			}			
		}
		ch=ch->next;
	}
	suspendthreads=0;
}

DLLIMPORT void MESHRegisterAsyncRequestAndMemCpy(MPI_Request * req,char * varname,void * cpyinto, void * src,int elements,char * cpyintoname)
{
	MESHRegisterAsyncRequest(req,varname);
	// new node is now the head;
	head->mode=REQUESTANDMEMCPY;
	head->cpyinto=cpyinto;
	head->src=src;
	head->bytes=elements;
	char * newvarname=malloc(sizeof(char) * strlen(cpyintoname));
	strcpy(newvarname,cpyintoname);
	head->cpyintoname=newvarname;
	pthread_t * thread=malloc(sizeof(pthread_t));
	head->copythread=thread;
	int rc=pthread_create(thread, NULL, asyncupdatememthread, head);
	if (rc)
	{
         printf("None fatal error: Can not create thread for %s variable update, error id: %d\n",cpyintoname,rc);
    }
}

DLLIMPORT void MESHSyncWithVariableName(char * varname)
{
	if (head==NULL) return ;
	performbufferedcycleifneeded(varname);
	if (performonesidedwindowifneeded(varname)==1) return;
	int numberofrequests=getNumRequestsPerVariable(varname);	
	if (numberofrequests==0) return ;
	suspendthreads=1;
	MPI_Request requestholder[numberofrequests];
	fillRequestArrayForVar(requestholder,varname,numberofrequests);	
	MPI_Waitall(numberofrequests,requestholder,MPI_STATUSES_IGNORE);	
	honourMemCpyForVar(varname);
	removeRequestsOfVars(varname);
	suspendthreads=0;	
}

DLLIMPORT void MESHSyncAll()
{
	if (head==NULL) return ;
	dosyncononesidedwindows();
	dobuffercycles();
	int numberofrequests=getNumAllRequests();
	if (numberofrequests==0) return ;
	suspendthreads=1;
	MPI_Request requestholder[numberofrequests];
	fillRequestArrayForAll(requestholder,numberofrequests);
	MPI_Waitall(numberofrequests,requestholder,MPI_STATUSES_IGNORE);	
	honourMemCpyForAll();
	removeAllRequests();		
	suspendthreads=0;
}

void honourMemCpyForAll()
{
	List * ch=head;
	while (ch != NULL)
	{		
			if (ch->mode==REQUESTANDMEMCPY)
			{			
				memcpy(ch->cpyinto,ch->src,ch->bytes);		
				if (ch->mode==REQUESTANDMEMCPY)
				{
					free(ch->cpyintoname);
					pthread_cancel(*ch->copythread);	
					free(ch->copythread);
				}					
			} 									
		ch=ch->next;
	}
}

void honourMemCpyForVar(char * varname)
{
	List * ch=head;
	while (ch != NULL)
	{
		if (strcmp(ch->varname,varname)==0)
		{		
			if (ch->mode==REQUESTANDMEMCPY)
			{					
				memcpy(ch->cpyinto,ch->src,ch->bytes);		
				if (ch->mode==REQUESTANDMEMCPY)
				{
					free(ch->cpyintoname);
					pthread_cancel(*ch->copythread);	
					free(ch->copythread);
				}	
			} 								
		}
		ch=ch->next;
	}
}

int performbufferedcycleifneeded(char * varname)
{
	List * ch=head;
	while (ch != NULL)
	{
		if (strcmp(ch->varname,varname)==0)
		{		
			if (ch->mode==BUFFEREDVAR)
			{									
				// cycles the buffer - whenever a buffer is detatched it will force all contents to be emptied (i.e. delivered to recepient)
				void * detach;
				int temps;
				MPI_Buffer_detach(&detach,&temps);
				MPI_Buffer_attach(detach,temps);
				// MPI isnt that clever - it just allows 1 buffer globally, therefore dont need much else - and will sync for all buffered sends too				
				return 1;
			} 								
		}
		ch=ch->next;
	}
	return 0;
}

int performonesidedwindowifneeded(char * varname)
{
	List * ch=head;
	while (ch != NULL)
	{
		if (strcmp(ch->varname,varname)==0)
		{		
			if (ch->mode==ONESIDEDWINDOW)
			{					
				MPI_Win_fence(MPI_MODE_NOPRECEDE, (*(ch->onesidedwindow)));	// synchronization
				MPI_Win_fence((MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED), (*(ch->onesidedwindow)));
				return 1;
			} 								
		}
		ch=ch->next;
	}
	return 0;
}

void removeAllRequests()
{
	List * ch=head;
	List * newhead=NULL;
	while (ch != NULL)
	{
		if (ch->mode==ONESIDEDWINDOW)
		{
			if (newhead==NULL) {newhead=ch;}
		} else {
			free(ch->asyncrequest);
			free(ch->varname);		
			List * oldch=ch;
			ch=ch->next;
			if (ch != NULL) {ch->prev=oldch->prev;}
			if (oldch->prev != NULL) {oldch->prev->next = ch;}
			free(oldch);
		}
	}
	head=newhead;
}

void removeRequestsOfVars(char * varname)
{
	List * ch=head;
	while (ch != NULL)
	{
		if (strcmp(ch->varname,varname)==0 && ch->mode != BUFFEREDVAR) 
		{			
			free(ch->asyncrequest);
			free(ch->varname);					
			if ((ch->prev) != NULL) {(ch->prev)->next=ch->next;}			
			if ((ch->next) != NULL) {(ch->next)->prev=ch->prev;}			
			if (ch==head) {head=ch->next;}
			List * oldch=ch;			
			ch=ch->next;
			free(oldch);			
		}	else {
			ch=ch->next;
		}		
	}
}

void dosyncononesidedwindows()
{
	List * ch=head;
	while (ch != NULL)
	{			
			if (ch->mode==ONESIDEDWINDOW)
			{					
				MPI_Win_fence(MPI_MODE_NOPRECEDE, (*(ch->onesidedwindow)));	// synchronization
				MPI_Win_fence((MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED), (*(ch->onesidedwindow)));
			} 								
		ch=ch->next;
	}
}

void dobuffercycles()
{
	List * ch=head;
	while (ch != NULL)
	{			
			if (ch->mode==BUFFEREDVAR)
			{					
				void * detach;
				int temps;
				MPI_Buffer_detach(&detach,&temps);
				MPI_Buffer_attach(detach,temps);
			} 								
		ch=ch->next;
	}
}

void fillRequestArrayForAll(MPI_Request requestholder[], int numberofrequests)
{	
	int index=0;
	List * ch=head;
	while (ch != NULL)
	{
		if (ch->mode!=ONESIDEDWINDOW && ch->mode != BUFFEREDVAR)
		{								 
			memcpy(&requestholder[index],ch->asyncrequest,sizeof(MPI_Request));		
			index++;
		}
		ch=ch->next;
	}
	if (numberofrequests != index)
	{
		fprintf(stderr,"Async missmatch error!\n");
	}
}	

void fillRequestArrayForVar(MPI_Request requestholder[], char * varname,int numberofrequests)
{
	int index=0;
	List * ch=head;
	while (ch != NULL)
	{
		if (strcmp(ch->varname,varname)==0 && ch->mode != BUFFEREDVAR)
		{			
			memcpy(&requestholder[index],ch->asyncrequest,sizeof(MPI_Request));			
			index++;
		}
		ch=ch->next;
	}
	if (numberofrequests != index)
	{
		fprintf(stderr,"Async missmatch error!\n");
	}
}	

int getNumAllRequests()
{
	int req=0;
	List * ch=head;
	while (ch != NULL)
	{
		if (ch->mode != ONESIDEDWINDOW && ch->mode != BUFFEREDVAR) {req++;}
		ch=ch->next;
	}
	return req;
}

int getNumRequestsPerVariable(char * varname)
{
	int req=0;
	List * ch=head;
	while (ch != NULL)
	{
		if (strcmp(ch->varname,varname)==0 && ch->mode != BUFFEREDVAR) req++;
		ch=ch->next;
	}
	return req;
}
