// MPI Initialization
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &num_proc);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
numWorkProcs = CalcNumBlocks(size, num_proc, blockSize);
numBlocksWidth=size/blockSize;
// Buffer Initialization
int buffer_size = 2*size*size+size;
int *buffer = new int[buffer_size];
MPI_Buffer_attach(buffer,buffer_size);
if (rank < numWorkProcs)
{
// Memory Allocation
Matrix blockC = MatrixInitialization(blockSize,0);
Matrix blockA = MatrixInitialization(blockSize,0);
Matrix blockB = MatrixInitialization(blockSize,0);
if (rank==0)
{
A=MatrixInitialization(size,1);
B=MatrixInitialization(size,1);
C=MatrixInitialization(size,0);
starttime = MPI_Wtime();
A=Repack(A,size,blockSize);
B=Repack(B,size,blockSize);
}
MPI_Barrier(MPI_Comm_Work);
// Delivery blocks
MPI_Scatter(A, blockSize*blockSize,MPI_INT,blockA,blockSize*blockSize,MPI_INT,0,MPI_Comm_Work);
MPI_Scatter(B, blockSize*blockSize,MPI_INT,blockB,blockSize*blockSize,MPI_INT,0,MPI_Comm_Work);
int row_num = (int)rank/numBlocksWidth;
int col_num = rank%numBlocksWidth;
if (row_num!=0)
{
if (col_num < row_num)
MPI_Bsend (blockA,blockSize*blockSize,MPI_INT,rank+numBlocksWidth-row_num,0,MPI_Comm_Work);
else
MPI_Bsend (blockA,blockSize*blockSize,MPI_INT,rank-row_num,0,MPI_Comm_Work);
}
if (col_num!=0)
if (row_num < col_num)
MPI_Bsend (blockB,blockSize*blockSize,MPI_INT,rank+(numBlocksWidth-col_num)*numBlocksWidth,1,MPI_Comm_Work);
else
MPI_Bsend (blockB,blockSize*blockSize,MPI_INT,rank-numBlocksWidth*col_num,1,MPI_Comm_Work);
if ((row_num!=0)&&(col_num!=0))
{
MPI_Recv(blockA,blockSize*blockSize,MPI_INT,MPI_ANY_SOURCE,0,MPI_Comm_Work,&status);
MPI_Recv(blockB,blockSize*blockSize,MPI_INT,MPI_ANY_SOURCE,1,MPI_Comm_Work,&status);
}
else
if((row_num==0)&&(col_num!=0))
MPI_Recv(blockB,blockSize*blockSize,MPI_INT,MPI_ANY_SOURCE,1,MPI_Comm_Work,&status);
else
if ((row_num!=0)&&(col_num==0))
MPI_Recv(blockA,blockSize*blockSize,MPI_INT,MPI_ANY_SOURCE,0,MPI_Comm_Work,&status);
MPI_Barrier(MPI_Comm_Work);
// Multiplication
MultMatrix (blockA,blockB,blockC,blockSize);
for (int i=0; i < numBlocksWidth-1; i++)
{
if (col_num==0)
MPI_Bsend (blockA,blockSize*blockSize,MPI_INT,(row_num+1)*numBlocksWidth-1,0,MPI_Comm_Work);
else
MPI_Bsend (blockA,blockSize*blockSize,MPI_INT,rank-1,0,MPI_Comm_Work);
if (rank < numBlocksWidth)
MPI_Bsend (blockB,blockSize*blockSize,MPI_INT,rank+(numBlocksWidth-1)*numBlocksWidth,1,MPI_Comm_Work);
else
MPI_Bsend (blockB,blockSize*blockSize,MPI_INT,rank-numBlocksWidth,1,MPI_Comm_Work);
// Receive blocks
MPI_Recv(blockA,blockSize*blockSize,MPI_INT,MPI_ANY_SOURCE,0,MPI_Comm_Work,&status);
MPI_Recv(blockB,blockSize*blockSize,MPI_INT,MPI_ANY_SOURCE,1,MPI_Comm_Work,&status);
MultMatrix (blockA,blockB,blockC,blockSize);
}
// Assembly of the resulting matrix
MPI_Gather (blockC,blockSize*blockSize,MPI_INT,C,blockSize*blockSize,MPI_INT,0,MPI_Comm_Work);
MPI_Barrier(MPI_Comm_Work);
if (rank==0)
{
C=BackRepack(C,size,blockSize);
endtime = MPI_Wtime();
time1=endtime-starttime;
}