// Program 12.4 Release 1.3 on 30.08.2006
// The Parallel Gauss-Jacobi Method (outer loop has been parallelized)
  bool IsPrintEnabled;
  double *u;

int main(int argc, char *argv[])
{
  if (argc < 2)
  {
    printf("USAGE: GJ_Par.exe N\n");
    return 1;
  }
  if (argc == 2)
    IsPrintEnabled = 0;
  else
    IsPrintEnabled = atoi(argv[2]);

  int N = atoi(argv[1]);

  GJ_Par(N);  

  return 0;
}

int GJ_Par(int N)
{
  int i, j;
  clock_t begin,end;
  
  double temp;
  double dmax;
  double dm, d; 
  int Step; 
  omp_lock_t dmax_lock;
  
  printf("The Gauss - Jacobi algorithm. Parallel version.\r\n");
  
  u = CreateMatrix(N);
  un = CreateMatrix(N);
  if (u == NULL || un == NULL)
    return 1;
  
  Step = 0; 
  
  printf("Matrix size: %d\r\n", N);
  begin = clock();
  omp_init_lock(&dmax_lock);
  
  
  // main loop
  
  do
  {
    dmax = 0;
#pragma omp parallel for shared(u, un, N, dmax) private(i,j,temp,d,dm)
    for (i = 1; i < N - 1; i++)
    {
      dm = 0;
      for(j = 1; j < N - 1; j++)
      {
        temp = u[N * i + j];
        un[N * i + j] = 0.25 * (u[N * i + j + 1] + u[N * i + j - 1] + 
          u[N * (i + 1) + j] + u[N * (i - 1) + j]);
        d = fabs(un[N * i + j] - temp);
        if (dm < d)
          dm = d;
      }    
      
      omp_set_lock(&dmax_lock);
      if ( dmax < dm ) 
        dmax = dm;
      omp_unset_lock(&dmax_lock);
      
    }
    // change the matrixes
    double * utemp = u;
    u = un;
    un = utemp;
    
    Step++;
  }
  while (dmax > EPS);
  
  // end of main loop
  
  
  end = clock();
  
  long tck = end - begin;
  
  printf("Results:\r\n- current eps = %f\r\n- required eps = %f\r\n- matrix Size = %d\r\n- iterations = %d\r\n- time = %f\r\n",
    dmax, EPS, N, Step, ((double)tck)/CLOCKS_PER_SEC);

  if (IsPrintEnabled)
    PrintMatrix(u, N);
  
  DeleteMatrix(u);
  DeleteMatrix(un);
  
  return 0;
}