Реализация алгоритма

#include "stdafx.h"
#include <mpi.h>
#include <stdio.h>
#include <conio.h>
#include <time.h>
#include <stdlib.h>
#include <iostream>
#include <math.h>
 
using namespace std;
 
 
// Выделение памяти под матрицы
void MallocMatrix(double*& matrix, int n)
{
    matrix = (double*)malloc(n*n*sizeof(double));
}
 
void CallocMatrix(double*& matrix, int n)
{
    matrix = (double*)calloc(n*n,sizeof(double));
}
 
// Случайная генерация матрицы
void RandomMatrixGen(double*& matrix, int n)
{
    for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++)
            matrix[i*n+j] = rand()%20-10;
}
 
// Последовательный алгоритм умножения
double* MMul(double* imatrixa, double* imatrixb, int n, double& time)
{
    double* imatrixresult = 0;
    CallocMatrix(imatrixresult,n);
    double d1 = MPI_Wtime();
    for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++)
            for (int k = 0; k < n; k++)
                imatrixresult[i*n+j]+=imatrixa[i*n+k]*imatrixb[k*n+j];
    double d2 = MPI_Wtime();
    time = d2-d1;
    return imatrixresult;
}
 
// Вывод матрицы
void PrintMatrix(double* matrix, int n)
{
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)
            printf("%f ",matrix[i*n+j]);
        printf("\n");
    }
}
 
// Сравнение матриц
bool MatrixEqual(double* matrixa, double* matrixb, int n)
{
    bool equal = true;
    for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++)
            if (abs(matrixa[i*n+j]-matrixb[i*n+j])>0.00001)
                equal =false;
    return equal;
}
 
// Параллельный алгоритм ленточного умножения матриц
double* LinearMMul(int rank, int size, double* matrixa, double* matrixb, int n)
{
    MPI_Status stat;
 
    int* sendcount = (int*)malloc(size*sizeof(int));
    int* diffbetw = (int*)malloc(size*sizeof(int));
 
    int d = n/size;
    int* addarr = (int*)calloc(size,sizeof(int)); // Массив добавок к массиву с количеством отправляемых элементов
    int limit = n - d*size; 
    for (int i = 0; i < limit; i++)
        addarr[i]++;
 
    int sum = 0; // Переменная для определения начала очередной строки
    for (int i = 0; i < size; i++)
    {
        sendcount[i]=(d+addarr[i])*n;
        if (i!=0)
        {
            sum+=addarr[i-1]*n;
            diffbetw[i]=i*d*n+sum;
        }
        else 
            diffbetw[i]=i*d*n;
    }
    double* dtempa = (double*)malloc(n*(d+1)*sizeof(double));
    double* dtempb = (double*)malloc(n*(d+1)*sizeof(double));
    double* dtempres = (double*)calloc(n*(d+1),sizeof(double));
    double* resmatrix = 0;
    MallocMatrix(resmatrix,n);
    // Рассылка строк процессам
    MPI_Scatterv(matrixa,sendcount,diffbetw,MPI_DOUBLE,dtempa,(d+1)*n,MPI_DOUBLE,0,MPI_COMM_WORLD);
    MPI_Scatterv(matrixb,sendcount,diffbetw,MPI_DOUBLE,dtempb,(d+1)*n,MPI_DOUBLE,0,MPI_COMM_WORLD);
 
    int count = sendcount[rank]/n;  // Количество строк у текущего процесса
    int curcount = count; // Переменная будет хранить количество полученных строк матрицы
 
    int prevcount = 0; 
    for (int i = 1; i <= rank; i++)
        prevcount+=sendcount[i-1]/n; // Смещение в строке для перемножения частей строк
 
    int currank = rank; // Переменная будет хранить ранг процесса, от которого были получены строки
    for (int z = 0; z < size; z++)
    {
        // Просто перемножение матриц
        for (int i = 0; i < count; i++)
            for (int j = 0; j < curcount; j++)
                for (int k = 0; k < n; k++)
                    dtempres[i*n+k]+=dtempa[i*n+j+prevcount]*dtempb[j*n+k];
 
        if (z!=size-1)
        {
            // Обмен строками
            int to = (rank + 1) % size;
            int from = rank-1;
            if (from < 0)
                from = size - 1;
            MPI_Sendrecv_replace(dtempb,(d+1)*n,MPI_DOUBLE,from,0,to,0,MPI_COMM_WORLD,&stat);
 
            // Вычисление смещения и количества полученных строк
            prevcount = (prevcount+curcount)%n;
            currank = rank+z+1;
            if (currank>=size)
                currank %= size;
            curcount = sendcount[currank]/n;
        }
    }
    // Объединение результаов
    MPI_Gatherv(dtempres,sendcount[rank],MPI_DOUBLE,resmatrix,sendcount,diffbetw,MPI_DOUBLE,0,MPI_COMM_WORLD);
 
    free(sendcount);
    free(diffbetw);
    free(addarr);
    free(dtempa);
    free(dtempb);
    free(dtempres);
    return resmatrix;
}

// Алгоритм Фокса
double* BlockMMul(int rank, int size, double* matrixa, double* matrixb, int n)
{
    int sqrtsize = int(sqrt((double)size)); // Количество блоков в "строке"
    if (sqrtsize*sqrtsize!=size)
        return 0;
 
    int blocksize = int(ceil((double)n/sqrtsize)); 
    int* sendcount = (int*)malloc(size*sizeof(int));
    int* diffbetw = (int*)malloc(size*sizeof(int));
 
    double* imatrixa = 0;
    double* imatrixb = 0;
    double* itempa = (double*)malloc(blocksize*blocksize*sizeof(double));
    double* itempb = (double*)malloc(blocksize*blocksize*sizeof(double));
    double* itempagetted = (double*)malloc(blocksize*blocksize*sizeof(double));
    double* resmatrix = 0;
 
    int msize = blocksize*sqrtsize; // Новый размер матрицы
    if (rank == 0)
    {
        // Создание увеличенных матриц
        imatrixa = (double*)calloc(msize*msize,sizeof(double));
        imatrixb = (double*)calloc(msize*msize,sizeof(double));
        for (int i = 0; i < n; i++)
            for (int j = 0; j < n; j++)
            {
                imatrixa[i*msize+j]=matrixa[i*n+j];
                imatrixb[i*msize+j]=matrixb[i*n+j];
            }
        MallocMatrix(resmatrix,msize);
    }
    for (int i = 0; i < size; i++)
    {
        sendcount[i] = 1;
        diffbetw[i]=blocksize*(i%sqrtsize)+i/sqrtsize*blocksize*msize;
    }
 
    int blen[2], disp[2],column[2];
    MPI_Datatype blocktype, tosend;
    int sizeofdouble;
    // Создание блочного типа
    MPI_Type_vector(blocksize,blocksize,msize,MPI_DOUBLE,&blocktype);
    MPI_Type_commit(&blocktype);
    MPI_Type_extent(MPI_DOUBLE, &sizeofdouble);
    blen[0]  = 1;
    blen[1]  = 1;
    disp[0]  = 0;
    disp[1]  = sizeofdouble;
    column[0]= blocktype;
    column[1] = MPI_UB;
    MPI_Type_struct(2, blen, disp, column, &tosend);
    MPI_Type_commit(&tosend);
 
    // Рассылка блоков
    MPI_Scatterv(imatrixa,sendcount,diffbetw,tosend,itempa,blocksize*blocksize,MPI_DOUBLE,0,MPI_COMM_WORLD);
    MPI_Scatterv(imatrixb,sendcount,diffbetw,tosend,itempb,blocksize*blocksize,MPI_DOUBLE,0,MPI_COMM_WORLD);
 
    double* resblock = (double*)calloc(blocksize*blocksize,sizeof(double));
 
    // Создание строчного коммуникатора из декартового
    MPI_Comm comm, strcom;
    int dimensions[2], periods[2], vcoods[2];
    dimensions[0]=sqrtsize;
    dimensions[1]=sqrtsize;
    periods[0]=1;
    periods[1]=1;
    vcoods[0]=0;
    vcoods[1]=1;
    MPI_Cart_create(MPI_COMM_WORLD, 2, dimensions, periods, 1, &comm);
    MPI_Cart_sub(comm,vcoods,&strcom);
 
 
    MPI_Status stat;
    int from, to;
    int bcast_root;
 
    for (int z = 0; z < sqrtsize; z++)
    {
        // Перемножение блоков
        bcast_root = (rank/sqrtsize + z) % sqrtsize; 
        if (bcast_root == rank%sqrtsize)
        { 
            MPI_Bcast(itempa, blocksize*blocksize, MPI_DOUBLE, bcast_root, strcom); 
            for (int i = 0; i < blocksize; i++)
                for (int j = 0; j < blocksize; j++)
                    for (int k = 0; k < blocksize; k++)
                        resblock[i*blocksize+j]+=itempa[i*blocksize+k]*itempb[k*blocksize+j];
        } 
        else 
        { 
 
            MPI_Bcast(itempagetted, blocksize*blocksize, MPI_DOUBLE, bcast_root, strcom); 
            for (int i = 0; i < blocksize; i++)
                for (int j = 0; j < blocksize; j++)
                    for (int k = 0; k < blocksize; k++)
                        resblock[i*blocksize+j]+=itempagetted[i*blocksize+k]*itempb[k*blocksize+j];
        }
 
        if (z!=sqrtsize-1)
        {
            from = rank-sqrtsize;
            if (from<0)
                from = sqrtsize*sqrtsize-sqrtsize+rank%sqrtsize; //-from%sqrtsize;
            to = rank+sqrtsize;
            if (to>=size)
                to = to%size;
 
            // Обмен блоками
            MPI_Sendrecv_replace(itempb,blocksize*blocksize,MPI_DOUBLE,from,0,to,0,MPI_COMM_WORLD,&stat);
        }
    }
    // Объединение результатов
    MPI_Gatherv(resblock,blocksize*blocksize,MPI_DOUBLE,resmatrix,sendcount,diffbetw,tosend,0,MPI_COMM_WORLD);
 
    // Создание уменьшенной матрицы 
 
    double* cmatrix = 0;
    if (rank==0)
    {
        cmatrix = (double*)malloc(n*n*sizeof(double));
        for (int i = 0; i < n; i++)
            for (int j = 0; j < n; j++)
                cmatrix[i*n+j]=resmatrix[i*msize+j];
    }
 
    free(sendcount);
    free(diffbetw);
    free(itempa);
    free(itempb);
    free(itempagetted);
    if(rank==0)
    {
        free(imatrixa);
        free(imatrixb);
    }
 
    free(resblock);
    free(resmatrix);
    MPI_Type_free(&blocktype);
    MPI_Type_free(&tosend);
    MPI_Comm_free(&comm);
    MPI_Comm_free(&strcom);
 
    return cmatrix;
}

Реализация

Новости