/*************************************************
Dynamic Reducing Write Update simulation program

This program simulates the use of DRWU cache
coherence protocol for a CSM system.

Assumptions:
  1: We assume a RISC like architecture - one clock cycle/instruction
  2: The bus is one word/byte wide
  3: Each instruction is one word/byte long
  4: We assume that the counters (overhead) for each block in cache 
     is taken care of by a secondary cache
  5: A write to cache will take 2 cycles
  6: A read from cache takes 1 cycle
*/

#include "DRWUsim.h"


//************************************************************
//main method
int main(void)
{
  int i=0, j=0, bindex=0, branchbindex=0, writebindex=0;
  long int addr=0, block=0;
  long int branchaddr=0, branchblock=0;
  long int writeaddr=0, writeblock=0;
  long int numCycles=0;
  int ProcUsingBus=-1;

  printf("*********** DRWU Simulator ***********\n\n");
  printf("          Zachary D. Patitz\n\n");
  printf("This is a simulator for the Dynamic\n");
  printf("Reducing Write Update protocol for \n");
  printf("shared bus DSM systems using snooping\n");
  printf("protocols.\n");
  printf("**************************************\n\n\n");

  int numPEs = 0;
  int blocksize=0, memsize=0, cachesize=0;

  //let the user input vital system parameters
  printf("Enter number of Processors: ");
  scanf("%d", &numPEs);
  printf("Enter Kb of Cache:          ");
  scanf("%d", &cachesize);
  printf("Enter Mb of Central Memory: ");
  scanf("%d", &memsize);
  printf("Enter blocksize:            ");
  scanf("%d", &blocksize);
  printf("Enter Number of cycles:     ");
  scanf("%d", &numCycles);
  printf("\n");

  //create the array of pes
  CProcElement* PEs = new CProcElement[numPEs];

  //initialize the pes with the input values
  for(i=0; i<numPEs; i++)
    PEs[i].InititializePE(blocksize, cachesize);
  
  //seed the random number generator
  srand((unsigned)time(NULL));

  //create central memory
  CMemory CM(memsize, blocksize);
  CBus Bus;

  //print out the system specs
  printSystem(CM, PEs[0], numPEs);

  //generate central memory
  CM.GenerateMemory(numPEs);

  printf("\n");

  InitializeTask(PEs, CM, numPEs);

  //now we can start cycling through!!
  for(i=0; i<numCycles; i++)
  {
    //visit each processor for each cycle
    for(j=0; j<numPEs; j++)
    {
      if(PEs[j].getIdle()==0)
      {
        if(PEs[j].getBusyCycles()<=0)
        {
          PEs[j].setBusyCycles(0);

          //busy cycle executing instr
          PEs[j].incBusyCycles();
          
          //find out where we are...
          addr=PEs[j].Cache->getCurrAddress();
          bindex=addr%BLOCK_SIZE;
          block=((addr-bindex)/BLOCK_SIZE);
          
          //read
          if((CM.Blocks[block].getInstruction(bindex).inst)==LOAD)
          {
          /*
            PEs[j].incBusyCycles();
            //            goToNextAddress(&PEs[j], &Bus, j, CM);

            long int readaddr=CM.Blocks[block].getInstruction(bindex).address;
            int readbindex=readaddr%BLOCK_SIZE;
            long int readblock=((readaddr-readbindex)/BLOCK_SIZE);
            
            if(CM.Blocks[readblock].getC(j)>0)
            {
              CM.Blocks[readblock].setC(j, C);
            }
            else
            {
              //processor has to wait for the bus to get it's new block
              PEs[j].incIdle();
              Bus.addProcToQueue(j, readaddr, REQUEST);
            }
            PEs[j].Cache.setCurrAddress(readaddr);
            */
          }
          
          //write
          else if((CM.Blocks[block].getInstruction(bindex).inst)==STORE)
          {
            /*
            PEs[j].incBusyCycles();
            PEs[j].incBusyCycles();

            writeaddr=CM.Blocks[block].getInstruction(bindex).address;
            writebindex=writeaddr%BLOCK_SIZE;
            writeblock=((writeaddr-writebindex)/BLOCK_SIZE);

            //if its cached it takes two cycles to execute the write
            if(CM.Blocks[writeblock].getC(j)!=0)
            {
              CM.Blocks[writeblock].setC(j, C);
            }
            else
            {
              PEs[j].incIdle();
              Bus.addProcToQueue(j, writeaddr, REQUEST);
            }

            PEs[j].Cache.setCurrAddress(writeaddr);
            
            if(CM.Blocks[writeblock].getProcCount()>0)
            {
              PEs[j].incIdle();
              printf("broadcast\n");
              Bus.addProcToQueue(j, writeaddr, BROADCAST);
            }
            */
          }
          //arithmetic instruction
          else if((CM.Blocks[block].getInstruction(bindex).inst)==ARITH)
          {
          }
          //branch
          else
          {
            /*
            branchaddr=CM.Blocks[block].getInstruction(bindex).address;
            branchbindex=branchaddr%BLOCK_SIZE;
            branchblock=((branchaddr-branchbindex)/BLOCK_SIZE);
            
            //if its cached it takes one cycle to execute the branch
            if(CM.Blocks[branchblock].getC(j)!=0)
            {
              PEs[j].incBusyCycles();
              CM.Blocks[branchblock].setC(j, C);
            }
            else
            {
              PEs[j].incIdle();
              Bus.addProcToQueue(j, branchaddr, REQUEST);
            }

            PEs[j].Cache.setCurrAddress(branchaddr);
            */
          }
        }
        PEs[j].decBusyCycles();
      }

    }
    //now to take care of bus traffic
    if(Bus.getBusyCycles()<=0 && ProcUsingBus != -1)
    {
      PEs[ProcUsingBus].decIdle();
	    PEs[ProcUsingBus].incBusyCycles();
      ProcUsingBus=-1;
    }
    if(Bus.getProcQueue()!=NULL && Bus.getBusyCycles()<=0)
    {
      ProcUsingBus=Bus.executeNextRequest(&CM);
    }
	Bus.decBusyCycles();
  }

  printf("\n%d cycles\n", NUM_CYCLES);
  printf("%ld Busy Cycles for bus\n", Bus.getTotBusyCycles());

  for(i=0; i<numPEs; i++)
    printf("%ld Busy Cycles for processor %d\n", PEs[i].getTotBusyCycles(), i);

  return 0;
}
//************************************************************

//************************************************************
void write(CMemory* CM, CProcElement* pe, CBus* bus, long int block, int pnum)
{
  
}
//************************************************************

//************************************************************
void goToNextAddress(CProcElement* pe, CBus* bus, int pnum, CMemory cm)
{
  int blockcheck=0, bindex=0;
  long int addr=0, block=0;

  //find out where we are...
  addr=pe->Cache->getCurrAddress();
  bindex=addr%cm.getBlockSize();
  block=((addr-bindex)/cm.getBlockSize());

  long int readaddr=cm.Blocks[block].getInstruction(bindex).address;
  int readbindex=readaddr%cm.getBlockSize();
  long int readblock=((readaddr-readbindex)/cm.getBlockSize());

  if(cm.Blocks[readblock].getC(pnum)>0)
  {
    cm.Blocks[readblock].setC(pnum, C);
  }
  else
  {
    //processor has to wait for the bus to get it's new block
    pe->incIdle();
    bus->addProcToQueue(pnum, readaddr, REQUEST);
  }
  pe->Cache->setCurrAddress(readaddr);
}

//************************************************************
//print out system information
void printSystem(CMemory CM, CProcElement pe, int numpes)
{
  printf("System Setup:\n");
  printf("  Central Memory Size:   %d Mb\n", CM.getMemSizeMB());
  printf("  Number of Processors:  %d\n", numpes);
  printf("  Cache Size:            %d Kb\n", pe.getCacheSize());
  printf("  Block Size:            %d Bytes\n", CM.getBlockSize());
  printf("\n");
}


//************************************************************
//Initialize the task by placing starting blocks in cache of
//each PE.
void InitializeTask(CProcElement* pes, CMemory cm, int numpes)
{
  int i;
  int address=cm.getNumDataBlocks();

  //set starting address for each Processing element
  //we put block 1 in PE 1, block 2 in PE 2, etc.
  for(i=0; i<numpes; i++)
  {
    pes[i].Cache->addBlock(cm.Blocks[address], i, 0);
    pes[i].Cache->setCurrAddress(address*cm.getBlockSize());
    pes[i].setBusyCycles(1); //to read the instr from cache
    address+=5;
  }

}
//************************************************************
/*

//************************************************************
//return the next address that the pe will use relative to 
//the address it is processing now
long int getNextAddress(long int curraddr)
{
  int rnum = rand()%100;
  long int newaddress = 0;

  if(rnum<=(NEXT_ADDR_PROB*100))
    return (curraddr+1);
  else
  {
    rnum=((rand()%OFFSET_RANGE)+MEM_OFFSET);

    if(rand()%2==0)
      newaddress = (curraddr+rnum);
    else
      newaddress = (curraddr-rnum);

    //make sure the value is in CM, if not, make it circular
    if(newaddress>CM_SIZE)
      return newaddress-CM_SIZE;
    else if(newaddress<0)
      return CM_SIZE+newaddress;
    else
      return newaddress;
  }

  //should never reach here...
  return 0;
}
//************************************************************

*/