/* nag_sparse_nsym_precon_bdilu (f11dfc) Example Program.
 *
 * Copyright 2014 Numerical Algorithms Group.
 *
 * Mark 24, 2013.
 */

#include <nag.h>
#include <nagf11.h>
#include <nag_stdlib.h>


static void overlap(Integer *n, Integer *nnz, Integer *irow, Integer *icol,
                    Integer *nb, Integer *istb, Integer *indb, Integer *lindb,
                    Integer *nover, Integer *iwork);

int main(void) {
   
   /* Scalars */
   double  dtolg;
   Integer i,j,k,la,lfillg,lindb,liwork,minval,mb,n,nb,nnz,nnzc,nover;
   Integer exit_status = 0, maxval_ret = 9999;
   Nag_SparseNsym_Piv  pstrag;
   Nag_SparseNsym_Fact milug;

   /* Arrays */
   char    nag_enum_arg[40];
   double  *a = 0, *dtol = 0;
   Integer *icol = 0, *idiag = 0, *indb = 0, *ipivp = 0, *ipivq = 0, *irow = 0;
   Integer *istb = 0, *istr = 0, *iwork = 0, *lfill = 0, *npivm = 0;
   Nag_SparseNsym_Piv  *pstrat;
   Nag_SparseNsym_Fact *milu;

   /* Nag Types */
   NagError            fail;
 
   /* Print example header */
   printf("nag_sparse_nsym_precon_bdilu (f11dfc) Example Program Results\n\n");

   /* Skip heading in data file */
   scanf("%*[^\n] ");

   /* Get the matrix order and number of non-zero entries. */
   scanf("%ld %*[^\n]", &n);
   scanf("%ld %*[^\n]", &nnz);

   la     = 20 * nnz;
   lindb  = 3 * n;
   liwork = 9 * n + 3;

   /* Allocate arrays */
   a     = NAG_ALLOC( la, double );
   irow  = NAG_ALLOC( la, Integer);
   icol  = NAG_ALLOC( la, Integer);

   idiag = NAG_ALLOC( lindb,   Integer);
   indb  = NAG_ALLOC( lindb,   Integer);
   ipivp = NAG_ALLOC( lindb,   Integer);
   ipivq = NAG_ALLOC( lindb,   Integer);
   istr  = NAG_ALLOC( lindb+1, Integer);

   iwork = NAG_ALLOC( liwork, Integer);

   if ( (!a) || (!irow) || (!icol) || (!idiag) || (!indb) || (!ipivp) || 
        (!ipivq) || (!istr) || (!iwork) ) {
      printf("Allocation failure!\n");
      exit_status = -1;
   }  
   
   /* Initialise arrays */
   for ( i = 0; i < la; i++ ) {
      a[i]    = 0.0;
      irow[i] = 0;
      icol[i] = 0;
   }

   for( i = 0; i < lindb; i++ ) {
      indb[i]  = 0;
      ipivp[i] = 0;
      ipivq[i] = 0;
      istr[i]  = 0;
      idiag[i] = 0;
   }
   istr[lindb] = 0;

   for( i = 0; i < liwork; i++ ) {
      iwork[i] = 0;
   }

   /* Read the matrix A */
   for ( i = 0; i < nnz; i++) {
      scanf("%lf %ld %"NAG_IFMT, &a[i], &irow[i], &icol[i] );
   }
   scanf("%*[^\n] ");

   /* Read algorithmic parameters */
   scanf("%ld %lf %*[^\n]",  &lfillg, &dtolg);

   /* nag_enum_name_to_value (x04nac): Converts NAG enum member name to value */
   scanf("%39s %*[^\n]", nag_enum_arg);
   pstrag = (Nag_SparseNsym_Piv) nag_enum_name_to_value( nag_enum_arg );

   /* nag_enum_name_to_value (x04nac): Converts NAG enum member name to value */
   scanf("%39s %*[^\n]", nag_enum_arg);
   milug = (Nag_SparseNsym_Fact) nag_enum_name_to_value( nag_enum_arg );

   /* Read algorithmic parameters */
   scanf("%ld %ld %*[^\n]", &nb, &nover);

   /* Allocate arrays */
   dtol  = NAG_ALLOC( nb,   double );
   istb  = NAG_ALLOC( nb+1, Integer);
   lfill = NAG_ALLOC( nb,   Integer);
   npivm = NAG_ALLOC( nb,   Integer);

   pstrat = (Nag_SparseNsym_Piv *) NAG_ALLOC( nb, Nag_SparseNsym_Piv);
   milu   = (Nag_SparseNsym_Fact *) NAG_ALLOC( nb, Nag_SparseNsym_Fact);

   if ( (!dtol) || (!istb) || (!lfill) || (!npivm) || (!pstrat) || (!milu) ) {
      printf("Allocation failure!\n");
      exit_status = -1;
   }

   /* Initialise arrays */
   for( i = 0; i < nb; i++ ) {
      dtol[i]   = 0.0;
      istb[i]   = 0;
      lfill[i]  = 0;
      npivm[i]  = 0;

      pstrat[i] = 0;
      milu[i]   = 0;
   }
   istb[nb] = 0;

   /* Define diagonal block indices.
    * In this example use blocks of MB consecutive rows and initialise
    * assuming no overlap.
    */
   mb = (n + nb - 1)/nb;
   for ( k = 0; k < nb; k++) {
      istb[k] = k * mb + 1;
   }
   istb[nb] = n + 1;

   for ( i = 0; i < n; i++) {
      indb[i] = i + 1;
   }

   /* Modify INDB and ISTB to account for overlap. */
   overlap(&n, &nnz, irow, icol, &nb, istb, indb, &lindb, &nover, iwork);
   
   /* Output matrix and blocking details */
   printf(" Original Matrix\n");
   printf(" n     = %4ld\n", n);
   printf(" nnz   = %4ld\n", nnz);
   printf(" nb    = %4ld\n", nb);

   for ( k = 0; k < nb; k++ ) {
     printf(" Block = %4ld,%12s = %4ld,", k+1, "order",
            istb[k+1] - istb[k]);
      minval = indb[istb[k]-1];
      for ( j = istb[k]; j < istb[k+1]-1; j++) {
         minval=MIN( minval, indb[j] );
      }
      printf("%13s = %4ld\n", "start row", minval);
   }
   printf("\n");

   /* Set algorithmic parameters for each block from global values */
   for (k = 0; k < nb; k++) {
      lfill[k]  =  lfillg;
      dtol[k]   =  dtolg;
      pstrat[k] =  pstrag;
      milu[k]   =  milug;
   }

   /* Initialise fail */
   INIT_FAIL(fail);
   
   /* Calculate factorization
    * 
    * nag_sparse_nsym_precon_bdilu (f11dfc). Calculates incomplete LU
    * factorization of local or overlapping diagonal blocks, mostly used
    * as incomplete LU preconditioner for real sparse matrix.
    */ 
   nag_sparse_nsym_precon_bdilu(n, nnz, a, la, irow, icol, nb, istb, indb,
                                lindb, lfill, dtol, pstrat, milu, ipivp,
                                ipivq, istr, idiag, &nnzc, npivm, &fail);

   if( fail.code != NE_NOERROR ) {
      printf("Error from nag_sparse_nsym_precon_bdilu (f11dfc).\n%s\n\n",
             fail.message);
      exit(-2);
   }
   
   /* Output details of the factorization */
   printf(" Factorization\n");
   printf(" nnzc  = %4ld\n\n", nnzc);
   printf(" Elements of factorization\n");
   printf("        i    j           c(i,j)   Index\n");

   for ( k = 0; k < nb; k++) {
      printf("  C_%1ld  --------------------------------\n", k+1);

      /* Elements of the k-th block */
      for ( i = istr[istb[k]-1]-1; i < istr[istb[k+1]-1]-1; i++) {
         printf("%9ld %4ld %16.5e %7ld\n",
                irow[i], icol[i], a[i], i+1);
      }
   }

   k = 0;
   maxval_ret = npivm[k];
   for (k = 1 ; k < nb; k++ ) {
      maxval_ret = MAX( maxval_ret, npivm[k] );
   }

   printf("\n Details of factorized blocks\n");
   if ( maxval_ret > 0) {

      /* Including pivoting details. */
      printf("  k   i    istr(i)   idiag(I)    indb(i)  ipivp(i)  ipivq(i)\n");
      for ( k = 0; k < nb; k++) {
         i = istb[k] - 1;
         printf("%3ld %3ld %10ld ",k+1, i+1, istr[i]);
         printf("%10ld %10ld %10ld %10ld\n",
                idiag[i], indb[i], ipivp[i], ipivq[i]);
 
         for ( i = istb[k]; i < istb[k+1] - 1; i++ ) {
            printf("%3ld %10ld %10ld ",
                   i+1, istr[i], idiag[i]);
            printf("%10ld %10ld %10ld\n",
                   indb[i], ipivp[i], ipivq[i]);
         }
         printf(" -----------------------------------------------------\n");
      }
   }
   else {
      
      /* No pivoting on any block. */
      printf("  k   i     istr(i)   idiag(i)    indb(i)\n");   
      for ( k = 0; k < nb; k++) {
         i = istb[k] - 1;
         printf("%3ld %3ld %10ld ",k+1,i+1,istr[i]); 
         printf("%10ld %10ld\n", idiag[i], indb[i]);

         for ( i = istb[k]; i < istb[k+1] - 1; i++) {
            printf("%7ld %10ld %10ld %10ld\n",
                   i+1, istr[i], idiag[i], indb[i]);
         }
         printf(" ---------------------------------------\n");
      }      
   }
   
   NAG_FREE(a);
   NAG_FREE(irow);
   NAG_FREE(icol);
   NAG_FREE(idiag);
   NAG_FREE(indb);
   NAG_FREE(ipivp);
   NAG_FREE(ipivq);
   NAG_FREE(istr);
   NAG_FREE(dtol);
   NAG_FREE(istb);
   NAG_FREE(lfill);
   NAG_FREE(npivm);
   NAG_FREE(pstrat);
   NAG_FREE(milu);
   NAG_FREE(iwork);

   return exit_status;
}

/* ************************************************************************** */

static void overlap(Integer *n, Integer *nnz, Integer *irow, Integer *icol,
                    Integer *nb, Integer *istb, Integer *indb, Integer *lindb,
                    Integer *nover, Integer *iwork) {
   /* Purpose
    * =======
    *
    * This routine takes a set of row indices INDB defining the diagonal blocks
    * to be used in nag_sparse_nsym_precon_bdilu (f11dfc) to define a block
    * Jacobi or additive Schwarz preconditioner, and expands them to allow for
    * NOVER levels of overlap.
    *
    * The pointer array ISTB is also updated accordingly, so that the returned
    * values of ISTB and INDB can be passed to 
    * nag_sparse_nsym_precon_bdilu (f11dfc) to define overlapping diagonal 
    * blocks.
    *
    * ----------------------------------------------------------------------- */

   /* Scalars */
   Integer i, ik, ind, iover, j, k, l, n21, nadd, row;

   /* Find the number of nonzero elements in each row of the matrix A, and start
    * address of each row. Store the start addresses in iwork(n,...,2*n-1).
    */

   for ( i = 0; i < (*n); i++) {
      iwork[i] =  0;
   }

   for ( i = 0; i < (*nnz); i++) {
      iwork[irow[i]-1] = iwork[irow[i]-1] + 1;
   }
   iwork[ (*n) ] = 1;

   for ( i = 0; i < (*n); i++) {
      iwork[(*n)+i+1] = iwork[(*n)+i] + iwork[i];
   }

   /* Loop over blocks. */
   for ( k = 0; k < (*nb); k++) {

      /* Initialize marker array. */
      for (j = 0; j < (*n); j++) {
         iwork[j] =  0;
      }

      /* Mark the rows already in block K in the workspace array. */
      for ( l = istb[k]; l < istb[k+1]; l++) {
         iwork[indb[l-1]-1] = 1;
      }

      /* Loop over levels of overlap. */
      for ( iover = 1; iover <= (*nover); iover++) {
 
         /* Initialize counter of new row indices to be added. */
         ind = 0;

         /* Loop over the rows currently in the diagonal block. */
         for ( l = istb[k]; l < istb[k+1]; l++ ) {
            row = indb[l-1];

            /* Loop over non-zero elements in row ROW. */
            for ( i = iwork[(*n)+row-1]; i < iwork[(*n)+row]; i++ ) {

               /* If the column index of the nonzero element is not in the
                * existing set for this block, store it to be added later, and
                * mark it in the marker array.
                */
               if ( iwork[icol[i-1]-1] == 0 ) {
                  iwork[icol[i-1]-1] = 1;
                  iwork[2*(*n)+1+ind] = icol[i-1];
                  ind = ind + 1;
               }
            }
         }

         /* Shift the indices in INDB and add the new entries for block K.
          * Change ISTB accordingly.
          */
         nadd = ind;
         if ( istb[(*nb)]+nadd-1 > (*lindb) ) {
            printf("**** lindb too small, lindb = %ld ****\n", *lindb);
            exit(-1);
         }
 
         for ( i = istb[(*nb)] - 1; i >= istb[k+1]; i-- ) {
            indb[i+nadd-1] = indb[i-1];
         }

         n21 = 2 * (*n) + 1;
         ik = istb[k+1] - 1;

         for ( j = 0; j < nadd; j++ ) {
            indb[ik + j] =  iwork[n21 + j];
         }

         for ( j = k+1; j < (*nb)+1; j++) {
            istb[j] =  istb[j] + nadd;
         }
      }
   }
   return;
}