// Program code 10.1 Release 1.2 on 30.12.2005
// Hyper Quick Sort Algorithm 
  int ProcRank;             // Rank of current processor
  int ProcNum;              // Number of processors
int main(int argc, char *argv[]) {
  double *pProcData;     // Data block on current processor 
  int     ProcDataSize;  // Size of the data block

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
  MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);

  // Data initialization and data distribution among the processes
  DataInitialization ( &pProcData, &ProcDataSize);  

  // Parallel data sorting 
  ParallelHyperQuickSort ( pProcData, ProcDataSize );

  // Process termination
  ProcessTermination ( pProcData, ProcDataSize );

  MPI_Finalize();
}


// Data initialization and data distribution among the processes 
void DataInitialization( double **pProcData, double *ProcDataSize){
  int DataSize; // Initial Size of array
  double *pData;
  // Memory allocation and data definition
  if ( ProcRank == 0 ) {
    DataSize = GetDataSize();
    *pData = new double[DataSize];
    DataGeneration ( pData, DataSize);
  }

  // Data distribution among the processes
  DataDistribution ( *pProcData, DataSize, *ProcDataSize);
}

// Data distribution among the processes
void DataDistribution (double *pProcData, int DataSize, int *ProcDataSize) {
  if ( rank == 0 ) { 
    *ProcDataSize = DataSize / ProcNum;
  }
  MPI_Bcast ( ProcDataSize, 1, MPI_INT, 0, MPI_COMM_WORLD );
  *ProcData = new double[*ProcDataSize];

  MPI_Scatter ( &pData[(ProcRank)*(*ProcDataSize)], *ProcDataSize, 
    MPI_DOUBLE, *ProcData, *ProcDataSize, MPI_DOUBLE, 0, MPI_COMM_WORLD );
}

void ParallelHyperQuickSort ( double *pProcData, int ProcDataSize) { 
  MPI_Status status;
  int CommProcRank;   // Rank of the processor, that communicates with current processor
  double *pData,      // Part of data block, that is supposed to be left on current processor
         *pSendData,  // Part of data block, that will be passed to the CommProcRank processor
         *pRecvData,  // Part of data block, that will be received from the CommProcRank processor
         *pMergeData; // Data block, that will be placed on current process after mergeing
  int     DataSize, SendDataSize, RecvDataSize, MergeDataSize;
  int HypercubeDim = (int)ceil(log(ProcNum)/log(2)); // Number of hypercube dimensions
  int Mask = ProcNum; 
  double Pivot; 

  // Initial local sort on each processor
  LocalDataSort ( pProcData, ProcDataSize ); 

  // Iterations of HyperQuickSort algorithm
  for ( int i = HypercubeDim; i > 0; i-- ) {

    // Definition of pivot element and it's broadcast 
    PivotDistribution (pProcData,ProcDataSize,HypercubeDim,Mask,i,&Pivot);
    Mask = Mask >>  1;

    // Definition of the block border
    int pos = GetProcDataDivisionPos (pProcData, ProcDataSize, Pivot);

    // Dividing block into parts
    if ( ( (rank&Mask) >> (i-1) ) == 0 ) { // higher bit = 0 
      pSendData = & pProcData[pos+1];
      SendDataSize = ProcDataSize - pos – 1;
      if ( SendDataSize < 0 ) SendDataSize = 0;
      CommProcRank = ProcRank + Mask
      pData = & pProcData[0];
      DataSize = pos + 1; 
    }
    else { // lower bit = 1
      pSendData = & pProcData[0];
      SendDataSize = pos + 1;
      if ( SendDataSize > ProcDataSize ) SendDataSize = pos;
      CommProcRank = ProcRank – Mask
      pData = & pProcData[pos+1];
      DataSize = ProcDataSize - pos - 1; 
      if ( DataSize < 0 ) DataSize = 0;
    }
    // Sending the sizes of the data blocks parts
    MPI_Sendrecv(&SendDataSize, 1, MPI_INT, CommProcRank, 0, 
      &RecvDataSize, 1, MPI_INT, CommProcRank, 0, MPI_COMM_WORLD, &status);

    // Sending the parts of data blocks
    pRecvData = new double[RecvDataSize];
    MPI_Sendrecv(pSendData, SendDataSize, MPI_DOUBLE,
      CommProcRank, 0, pRecvData, RecvDataSize, MPI_DOUBLE,
      CommProcRank, 0, MPI_COMM_WORLD, &status);
    
    // Merging the parts
    MergeDataSize = DataSize + RecvDataSize;
    pMergeData = new double[MergeDataSize];
    DataMerge(pMergeData, pMergeData, pData, DataSize, 
              pRecvData, RecvDataSize); 
    delete [] pProcData;
    delete [] pRecvData;
    pProcData = pMergeData;
    ProcDataSize = MergeDataSize; 
  }
}