#include "stdio.h"
#include "stdlib.h"
#include "time.h"
#include "math.h"
#include "mpi.h"
void initProcess(double** pMatrixA, double** pMatrixB, double** pMatrixC, int size, double** pProcARows, double** pProcBRows, double** pProcCRows, int** pRowNums,
int** pRowInds, int procNum, int procRank);
void deInitProcess(double* matrixA, double* matrixB, double* matrixC, double* procARows, double* procBRows, double* procCRows, int* rowNums, int* rowInds, int procRank);
void distributeData(double* matrixA, double* matrixB, int size, double* procARows, double* procBRows, int* rowNums, int procNum, int procRank);
void parallelMult(double* procARows, double* procBRows, double* procCRows, int size, int* rowNums, int* rowInds, int procNum, int procRank);
void replicateData(double* matrixC, int size, double* procCRows, int* rowNums, int procNum, int procRank);
void serialMult(double* matrixA, double* matrixB, double* matrixC, int size);
int compareMatrix(double* m1, double* m2, int size);
double randD();
int main(int argc, char* argv[])
{
int procNum = 0, procRank = 0;
int size = 0;
double* matrixA = NULL;
double* matrixB = NULL;
double* matrixC = NULL;
double* testC = NULL;
double* procARows = NULL;
double* procBRows = NULL;
double* procCRows = NULL;
int* rowNums = NULL;
int* rowInds = NULL;
double t1 = 0.0, t2 = 0.0, serialDur = 0.0, parallelDur = 0.0;
int i = 0, j = 0;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &procNum);
MPI_Comm_rank(MPI_COMM_WORLD, &procRank);
sscanf(argv[1], "%d", &size);
initProcess(&matrixA, &matrixB, &matrixC, size, &procARows, &procBRows, &procCRows, &rowNums, &rowInds, procNum, procRank);
distributeData(matrixA, matrixB, size, procARows, procBRows, rowNums, procNum, procRank);
MPI_Barrier(MPI_COMM_WORLD);
t1 = MPI_Wtime();
parallelMult(procARows, procBRows, procCRows, size, rowNums, rowInds, procNum, procRank);
MPI_Barrier(MPI_COMM_WORLD);
t2 = MPI_Wtime();
parallelDur = t2 - t1;
replicateData(matrixC, size, procCRows, rowNums, procNum, procRank);
if (procRank == 0)
{
testC = (double*) malloc(size * size * sizeof(double));
for (i = 0; i < size; ++i)
{
for (j = 0; j < size; ++j)
{
testC[i * size + j] = 0.0;
}
}
t1 = MPI_Wtime();
serialMult(matrixA, matrixB, testC, size);
t2 = MPI_Wtime();
serialDur = t2 - t1;
printf("*******************Results*******************\n");
if (compareMatrix(matrixC, testC, size))
printf("\tResults are idential\n");
else
printf("\tResults are NOT idential\n");
printf("\tParallel total time : %f\n", parallelDur);
printf("\tSerial total time : %f\n", serialDur);
printf("\tSpeedup : %f\n", serialDur / parallelDur);
printf("*********************End*********************\n");
free(testC);
}
deInitProcess(matrixA, matrixB, matrixC, procARows, procBRows, procCRows, rowNums, rowInds, procRank);
MPI_Finalize();
return 0;
}
void initProcess(double** pMatrixA, double** pMatrixB, double** pMatrixC, int size, double** pProcARows, double** pProcBRows, double** pProcCRows,
int** pRowNums, int** pRowInds, int procNum, int procRank)
{
double* matrixA = NULL;
double* matrixB = NULL;
double* matrixC = NULL;
double* procARows = NULL;
double* procBRows = NULL;
double* procCRows = NULL;
int* rowNums = NULL;
int* rowInds = NULL;
int i = 0, j = 0, temp = 0;
rowNums = (int*) malloc(procNum * sizeof(int));
rowInds = (int*) malloc(procNum * sizeof(int));
temp = size % procNum;
rowNums[0] = size / procNum;
if (temp > 0)
++rowNums[0];
else
temp = 1;
rowInds[0] = 0;
for (i = 1; i < temp; ++i)
{
rowNums[i] = size / procNum + 1;
rowInds[i] = rowInds[i - 1] + rowNums[i - 1];
}
for (i = temp; i < procNum; ++i)
{
rowNums[i] = size / procNum;
rowInds[i] = rowInds[i - 1] + rowNums[i - 1];
}
procARows = (double*) malloc(rowNums[0] * size * sizeof(double));
procBRows = (double*) malloc(rowNums[0] * size * sizeof(double));
procCRows = (double*) malloc(rowNums[0] * size * sizeof(double));
for (i = 0; i < rowNums[0]; ++i)
{
for (j = 0; j < size; ++j)
{
procCRows[i * size + j] = 0.0;
}
}
if (procRank == 0)
{
matrixA = (double*) malloc(size * size * sizeof(double));
matrixB = (double*) malloc(size * size * sizeof(double));
matrixC = (double*) malloc(size * size * sizeof(double));
srand(time(NULL));
for (i = 0; i < size; ++i)
{
for (j = 0; j < size; ++j)
{
matrixA[i * size + j] = 5.0 - randD() * 10.0;
matrixB[i * size + j] = 5.0 - randD() * 10.0;
matrixC[i * size + j] = 0.0;
}
}
}
*pMatrixA = matrixA;
*pMatrixB = matrixB;
*pMatrixC = matrixC;
*pProcARows = procARows;
*pProcBRows = procBRows;
*pProcCRows = procCRows;
*pRowNums = rowNums;
*pRowInds = rowInds;
}
void deInitProcess(double* matrixA, double* matrixB, double* matrixC, double* procARows, double* procBRows, double* procCRows, int* rowNums, int* rowInds, int procRank)
{
if (procRank == 0)
{
free(matrixA);
free(matrixB);
free(matrixC);
}
free(rowNums);
free(rowInds);
free(procARows);
free(procBRows);
free(procCRows);
}
void distributeData(double* matrixA, double* matrixB, int size, double* procARows, double* procBRows, int* rowNums, int procNum, int procRank)
{
int* sendNums = NULL;
int* sendInds = NULL;
int i = 0;
sendNums = (int*) malloc(procNum * sizeof(int));
sendInds = (int*) malloc(procNum * sizeof(int));
sendNums[0] = rowNums[0] * size;
sendInds[0] = 0;
for (i = 1; i < procNum; ++i)
{
sendNums[i] = rowNums[i] * size;
sendInds[i] = sendInds[i - 1] + sendNums[i - 1];
}
MPI_Scatterv(matrixA, sendNums, sendInds, MPI_DOUBLE, procARows, rowNums[procRank] * size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatterv(matrixB, sendNums, sendInds, MPI_DOUBLE, procBRows, rowNums[procRank] * size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
free(sendNums);
free(sendInds);
}
void parallelMult(double* procARows, double* procBRows, double* procCRows, int size, int* rowNums, int* rowInds, int procNum, int procRank)
{
int i = 0, j = 0, k = 0, p = 0, t = 0;
MPI_Status status;
for (p = 0; p < procNum - 1; ++p)
{
t = (procRank + p) % procNum;
for (i = 0; i < rowNums[procRank]; ++i)
{
for (j = 0; j < rowNums[t]; ++j)
{
for (k = 0; k < size; ++k)
{
procCRows[i * size + k] += procARows[i * size + j + rowInds[t]] * procBRows[k + j * size];
}
}
}
MPI_Sendrecv_replace(procBRows, rowNums[0] * size, MPI_DOUBLE, ((procRank == 0) ? (procNum - 1) : (procRank - 1)), 0, (procRank + 1) % procNum, 0, MPI_COMM_WORLD, &status);
}
t = (procRank + procNum - 1) % procNum;
for (i = 0; i < rowNums[procRank]; ++i)
{
for (j = 0; j < rowNums[t]; ++j)
{
for (k = 0; k < size; ++k)
{
procCRows[i * size + k] += procARows[i * size + j + rowInds[t]] * procBRows[k + j * size];
}
}
}
}
void replicateData(double* matrixC, int size, double* procCRows, int* rowNums, int procNum, int procRank)
{
int* recvNums = NULL;
int* recvInds = NULL;
int i = 0;
recvNums = (int*) malloc(procNum * sizeof(int));
recvInds = (int*) malloc(procNum * sizeof(int));
recvNums[0] = rowNums[0] * size;
recvInds[0] = 0;
for (i = 1; i < procNum; ++i)
{
recvNums[i] = rowNums[i] * size;
recvInds[i] = recvInds[i - 1] + recvNums[i - 1];
}
MPI_Gatherv(procCRows, rowNums[procRank] * size, MPI_DOUBLE, matrixC, recvNums, recvInds, MPI_DOUBLE, 0, MPI_COMM_WORLD);
free(recvNums);
free(recvInds);
}
void serialMult(double* matrixA, double* matrixB, double* matrixC, int size)
{
int i = 0, j = 0, k = 0;
for (i = 0; i < size; ++i)
{
for (j = 0; j < size; ++j)
{
for (k = 0; k < size; ++k)
{
matrixC[i * size + k] += matrixA[i * size + j] * matrixB[j * size + k];
}
}
}
}
double randD()
{
return rand() / (double) RAND_MAX;
}
int compareMatrix(double* m1, double* m2, int size)
{
int i = 0, j = 0;
for (i = 0; i < size; ++i)
{
for (j = 0; j < size; ++j)
{
if (fabs(m1[i * size + j] - m2[i * size + j]) > 0.0000001)
return 0;
}
}
return 1;
}