Реализация второго алгоритма

#include "stdio.h"
#include "stdlib.h"
#include "time.h"
#include "math.h"
#include "mpi.h"

void initProcess(double** pMatrixA, double** pMatrixB, double** pMatrixC, int size, double** pProcARows, double** pProcBRows, double** pProcCRows, int** pRowNums,
                 int** pRowInds, int procNum, int procRank);
void deInitProcess(double* matrixA, double* matrixB, double* matrixC, double* procARows, double* procBRows, double* procCRows, int* rowNums, int* rowInds, int procRank);
void distributeData(double* matrixA, double* matrixB, int size, double* procARows, double* procBRows, int* rowNums, int procNum, int procRank);
void parallelMult(double* procARows, double* procBRows, double* procCRows, int size, int* rowNums, int* rowInds, int procNum, int procRank);
void replicateData(double* matrixC, int size, double* procCRows, int* rowNums, int procNum, int procRank);
void serialMult(double* matrixA, double* matrixB, double* matrixC, int size);
int compareMatrix(double* m1, double* m2, int size);
double randD();

int main(int argc, char* argv[])
{
    int procNum = 0, procRank = 0;
    int size = 0;
    double* matrixA = NULL;
    double* matrixB = NULL;
    double* matrixC = NULL;
    double* testC = NULL;
    double* procARows = NULL;
    double* procBRows = NULL;
    double* procCRows = NULL;
    int* rowNums = NULL;
    int* rowInds = NULL;
    double t1 = 0.0, t2 = 0.0, serialDur = 0.0, parallelDur = 0.0;
    int i = 0, j = 0;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &procNum);
    MPI_Comm_rank(MPI_COMM_WORLD, &procRank);

    sscanf(argv[1], "%d", &size);

    initProcess(&matrixA, &matrixB, &matrixC, size, &procARows, &procBRows, &procCRows, &rowNums, &rowInds, procNum, procRank);

    distributeData(matrixA, matrixB, size, procARows, procBRows, rowNums, procNum, procRank);

    MPI_Barrier(MPI_COMM_WORLD);
    t1 = MPI_Wtime();

    parallelMult(procARows, procBRows, procCRows, size, rowNums, rowInds, procNum, procRank);

    MPI_Barrier(MPI_COMM_WORLD);
    t2 = MPI_Wtime();
    parallelDur = t2 - t1;

    replicateData(matrixC, size, procCRows, rowNums, procNum, procRank);

    if (procRank == 0)
    {
        testC = (double*) malloc(size * size * sizeof(double));

        for (i = 0; i < size; ++i)
        {
            for (j = 0; j < size; ++j)
            {
                testC[i * size + j] = 0.0;
            }
        }

        t1 = MPI_Wtime();

        serialMult(matrixA, matrixB, testC, size);

        t2 = MPI_Wtime();
        serialDur = t2 - t1;

        printf("*******************Results*******************\n");

        if (compareMatrix(matrixC, testC, size))
            printf("\tResults are idential\n");
        else
            printf("\tResults are NOT idential\n");

        printf("\tParallel total time : %f\n", parallelDur);
        printf("\tSerial total time : %f\n", serialDur);
        printf("\tSpeedup : %f\n", serialDur / parallelDur);

        printf("*********************End*********************\n");

        free(testC);
    }

    deInitProcess(matrixA, matrixB, matrixC, procARows, procBRows, procCRows, rowNums, rowInds, procRank);
    MPI_Finalize();
    return 0;
}

void initProcess(double** pMatrixA, double** pMatrixB, double** pMatrixC, int size, double** pProcARows, double** pProcBRows, double** pProcCRows,
                 int** pRowNums, int** pRowInds, int procNum, int procRank)
{
    double* matrixA = NULL;
    double* matrixB = NULL;
    double* matrixC = NULL;
    double* procARows = NULL;
    double* procBRows = NULL;
    double* procCRows = NULL;
    int* rowNums = NULL;
    int* rowInds = NULL;
    int i = 0, j = 0, temp = 0;

    rowNums = (int*) malloc(procNum * sizeof(int));
    rowInds = (int*) malloc(procNum * sizeof(int));
    temp = size % procNum;
    rowNums[0] = size / procNum;
    if (temp > 0)
        ++rowNums[0];
    else
        temp = 1;
    rowInds[0] = 0;
    for (i = 1; i < temp; ++i)
    {
        rowNums[i] = size / procNum + 1;
        rowInds[i] = rowInds[i - 1] + rowNums[i - 1];
    }
    for (i = temp; i < procNum; ++i)
    {
        rowNums[i] = size / procNum;
        rowInds[i] = rowInds[i - 1] + rowNums[i - 1];
    }

    procARows = (double*) malloc(rowNums[0] * size * sizeof(double));
    procBRows = (double*) malloc(rowNums[0] * size * sizeof(double));
    procCRows = (double*) malloc(rowNums[0] * size * sizeof(double));

    for (i = 0; i < rowNums[0]; ++i)
    {
        for (j = 0; j < size; ++j)
        {
            procCRows[i * size + j] = 0.0;
        }
    }

    if (procRank == 0)
    {
        matrixA = (double*) malloc(size * size * sizeof(double));
        matrixB = (double*) malloc(size * size * sizeof(double));
        matrixC = (double*) malloc(size * size * sizeof(double));

        srand(time(NULL));
        for (i = 0; i < size; ++i)
        {
            for (j = 0; j < size; ++j)
            {
                matrixA[i * size + j] = 5.0 - randD() * 10.0;
                matrixB[i * size + j] = 5.0 - randD() * 10.0;
                matrixC[i * size + j] = 0.0;
            }
        }
    }

    *pMatrixA = matrixA;
    *pMatrixB = matrixB;
    *pMatrixC = matrixC;
    *pProcARows = procARows;
    *pProcBRows = procBRows;
    *pProcCRows = procCRows;
    *pRowNums = rowNums;
    *pRowInds = rowInds;
}

void deInitProcess(double* matrixA, double* matrixB, double* matrixC, double* procARows, double* procBRows, double* procCRows, int* rowNums, int* rowInds, int procRank)
{
    if (procRank == 0)
    {
        free(matrixA);
        free(matrixB);
        free(matrixC);
    }
    free(rowNums);
    free(rowInds);
    free(procARows);
    free(procBRows);
    free(procCRows);
}

void distributeData(double* matrixA, double* matrixB, int size, double* procARows, double* procBRows, int* rowNums, int procNum, int procRank)
{
    int* sendNums = NULL;
    int* sendInds = NULL;
    int i = 0;

    sendNums = (int*) malloc(procNum * sizeof(int));
    sendInds = (int*) malloc(procNum * sizeof(int));
    sendNums[0] = rowNums[0] * size;
    sendInds[0] = 0;
    for (i = 1; i < procNum; ++i)
    {
        sendNums[i] = rowNums[i] * size;
        sendInds[i] = sendInds[i - 1] + sendNums[i - 1];
    }

    MPI_Scatterv(matrixA, sendNums, sendInds, MPI_DOUBLE, procARows, rowNums[procRank] * size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    MPI_Scatterv(matrixB, sendNums, sendInds, MPI_DOUBLE, procBRows, rowNums[procRank] * size, MPI_DOUBLE, 0, MPI_COMM_WORLD);

    free(sendNums);
    free(sendInds);
}

void parallelMult(double* procARows, double* procBRows, double* procCRows, int size, int* rowNums, int* rowInds, int procNum, int procRank)
{
    int i = 0, j = 0, k = 0, p = 0, t = 0;
    MPI_Status status;

    for (p = 0; p < procNum - 1; ++p)
    {
        t = (procRank + p) % procNum;
        for (i = 0; i < rowNums[procRank]; ++i)
        {
            for (j = 0; j < rowNums[t]; ++j)
            {
                for (k = 0; k < size; ++k)
                {
                    procCRows[i * size + k] += procARows[i * size + j + rowInds[t]] * procBRows[k + j * size];
                }
            }
        }
        MPI_Sendrecv_replace(procBRows, rowNums[0] * size, MPI_DOUBLE, ((procRank == 0) ? (procNum - 1) : (procRank - 1)), 0, (procRank + 1) % procNum, 0, MPI_COMM_WORLD, &status);
    }
    t = (procRank + procNum - 1) % procNum;
    for (i = 0; i < rowNums[procRank]; ++i)
    {
        for (j = 0; j < rowNums[t]; ++j)
        {
            for (k = 0; k < size; ++k)
            {
                procCRows[i * size + k] += procARows[i * size + j + rowInds[t]] * procBRows[k + j * size];
            }
        }
    }
}

void replicateData(double* matrixC, int size, double* procCRows, int* rowNums, int procNum, int procRank)
{
    int* recvNums = NULL;
    int* recvInds = NULL;
    int i = 0;

    recvNums = (int*) malloc(procNum * sizeof(int));
    recvInds = (int*) malloc(procNum * sizeof(int));
    recvNums[0] = rowNums[0] * size;
    recvInds[0] = 0;
    for (i = 1; i < procNum; ++i)
    {
        recvNums[i] = rowNums[i] * size;
        recvInds[i] = recvInds[i - 1] + recvNums[i - 1];
    }

    MPI_Gatherv(procCRows, rowNums[procRank] * size, MPI_DOUBLE, matrixC, recvNums, recvInds, MPI_DOUBLE, 0, MPI_COMM_WORLD);

    free(recvNums);
    free(recvInds);
}

void serialMult(double* matrixA, double* matrixB, double* matrixC, int size)
{
    int i = 0, j = 0, k = 0;

    for (i = 0; i < size; ++i)
    {
        for (j = 0; j < size; ++j)
        {
            for (k = 0; k < size; ++k)
            {
                matrixC[i * size + k] += matrixA[i * size + j] * matrixB[j * size + k];
            }
        }
    }
}

double randD()
{
    return rand() / (double) RAND_MAX;
}

int compareMatrix(double* m1, double* m2, int size)
{
    int i = 0, j = 0;

    for (i = 0; i < size; ++i)
    {
        for (j = 0; j < size; ++j)
        {
            if (fabs(m1[i * size + j] - m2[i * size + j]) > 0.0000001)
                return 0;
        }
    }
    return 1;
}
Реализация второго алгоритма

Новости