Actual source code: ex19.c
  2: static char help[] = "Solve a 2D 5-point stencil in parallel with Kokkos batched KSP and ASM solvers.\n\
  3: Input parameters include:\n\
  4:   -n                : number of mesh points in x direction\n\
  5:   -m                : number of mesh points in y direction\n\
  6:   -num_local_blocks : number of local sub domains for block jacobi\n\n";
  8: /*
  9:   Include "petscksp.h" so that we can use KSP solvers.
 10: */
 11: #include <petscksp.h>
 12: #include <petscmat.h>
 14: int main(int argc, char **args)
 15: {
 16:   Vec         x, b, u; /* approx solution, RHS, exact solution */
 17:   Mat         A, Pmat; /* linear system matrix */
 18:   KSP         ksp;     /* linear solver context */
 19:   PetscReal   norm;    /* norm of solution error */
 20:   PetscInt    i, j, Ii, J, Istart, Iend, n = 7, m = 8, its, nblocks = 2;
 21:   PetscBool   flg;
 22:   PetscScalar v;
 23:   PetscMPIInt size, rank;
 24:   IS         *is_loc = NULL;
 25:   PC          pc;
 27:   PetscFunctionBeginUser;
 28:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
 29:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 30:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
 31:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
 32:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
 33:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-num_local_blocks", &nblocks, NULL));
 34:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 35:          Compute the matrix and right-hand-side vector that define
 36:          the linear system, Ax = b.
 37:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 38:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 39:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, n * m, n * m));
 40:   PetscCall(MatSetFromOptions(A));
 41:   PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
 42:   PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 3, NULL));
 43:   // MatDuplicate keeps the zero
 44:   PetscCall(MatCreate(PETSC_COMM_WORLD, &Pmat));
 45:   PetscCall(MatSetSizes(Pmat, PETSC_DECIDE, PETSC_DECIDE, n * m, n * m));
 46:   PetscCall(MatSetFromOptions(Pmat));
 47:   PetscCall(MatSeqAIJSetPreallocation(Pmat, 5, NULL));
 48:   PetscCall(MatMPIAIJSetPreallocation(Pmat, 5, NULL, 3, NULL));
 49:   /*
 50:      Currently, all PETSc parallel matrix formats are partitioned by
 51:      contiguous chunks of rows across the processors.  Determine which
 52:      rows of the matrix are locally owned.
 53:   */
 54:   PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
 55:   /*
 56:      Set matrix elements for the 2-D, five-point stencil.
 57:    */
 58:   for (Ii = Istart; Ii < Iend; Ii++) {
 59:     v = -1.0;
 60:     i = Ii / n;
 61:     j = Ii - i * n;
 62:     if (i > 0) {
 63:       J = Ii - n;
 64:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 65:     }
 66:     if (i < m - 1) {
 67:       J = Ii + n;
 68:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 69:     }
 70:     if (j > 0) {
 71:       J = Ii - 1;
 72:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 73:     }
 74:     if (j < n - 1) {
 75:       J = Ii + 1;
 76:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 77:     }
 78:     v = 4.0;
 79:     PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
 80:   }
 81:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
 82:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
 83:   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
 84:   PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
 85:   PetscCall(MatCreateVecs(A, &u, &b));
 86:   PetscCall(MatCreateVecs(A, &x, NULL));
 87:   /*
 88:      Set exact solution; then compute right-hand-side vector.
 89:      By default we use an exact solution of a vector with all
 90:      elements of 1.0;
 91:   */
 92:   PetscCall(VecSet(u, 1.0));
 93:   PetscCall(MatMult(A, u, b));
 94:   /*
 95:      View the exact solution vector if desired
 96:   */
 97:   flg = PETSC_FALSE;
 98:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL));
 99:   if (flg) PetscCall(VecView(u, PETSC_VIEWER_STDOUT_WORLD));
100:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
101:                 Setup ASM solver and batched KSP solver data
102:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
103:   PetscCall(PCASMCreateSubdomains(A, nblocks, &is_loc));
104:   {
105:     MatScalar *AA;
106:     PetscInt  *AJ, maxcols = 0, ncols;
107:     for (PetscInt row = Istart; row < Iend; row++) {
108:       PetscCall(MatGetRow(A, row, &ncols, NULL, NULL));
109:       if (ncols > maxcols) maxcols = ncols;
110:       PetscCall(MatRestoreRow(A, row, &ncols, NULL, NULL));
111:     }
112:     PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
113:     /* make explicit block matrix for batch solver */
114:     //if (rank==1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] nblocks = %d\n", rank, nblocks));
115:     for (PetscInt bid = 0; bid < nblocks; bid++) {
116:       IS blk_is = is_loc[bid];
117:       //if (rank==1) PetscCall(ISView(blk_is, PETSC_VIEWER_STDOUT_SELF));
118:       const PetscInt *subdom, *cols;
119:       PetscInt        n, ncol_row, jj;
120:       PetscCall(ISGetIndices(blk_is, &subdom));
121:       PetscCall(ISGetSize(blk_is, &n));
122:       //if (rank==1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "\t[%d] n[%d] = %d\n",rank,bid,n));
123:       for (PetscInt ii = 0; ii < n; ii++) {
124:         const MatScalar *vals;
125:         //if (rank==1) PetscCall(PetscPrintf(PETSC_COMM_SELF, "\t\t[%d] subdom[%d] = %d\n",rank,ii,subdom[ii]));
126:         PetscInt rowB = subdom[ii]; // global
127:         PetscCall(MatGetRow(A, rowB, &ncols, &cols, &vals));
128:         for (jj = ncol_row = 0; jj < ncols; jj++) {
129:           PetscInt idx, colj = cols[jj];
130:           PetscCall(ISLocate(blk_is, colj, &idx));
131:           if (idx >= 0) {
132:             AJ[ncol_row] = cols[jj];
133:             AA[ncol_row] = vals[jj];
134:             ncol_row++;
135:           }
136:         }
137:         PetscCall(MatRestoreRow(A, rowB, &ncols, &cols, &vals));
138:         PetscCall(MatSetValues(Pmat, 1, &rowB, ncol_row, AJ, AA, INSERT_VALUES));
139:       }
140:       PetscCall(ISRestoreIndices(blk_is, &subdom));
141:     }
142:     PetscCall(PetscFree2(AA, AJ));
143:     PetscCall(MatAssemblyBegin(Pmat, MAT_FINAL_ASSEMBLY));
144:     PetscCall(MatAssemblyEnd(Pmat, MAT_FINAL_ASSEMBLY));
145:     PetscCall(MatViewFromOptions(Pmat, NULL, "-view_c"));
146:   }
147:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
148:                 Create the linear solver and set various options
149:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
150:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
151:   PetscCall(KSPSetOperators(ksp, A, Pmat));
152:   PetscCall(KSPSetFromOptions(ksp));
153:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
154:     Setup ASM solver
155:     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
156:   PetscCall(KSPGetPC(ksp, &pc));
157:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &flg));
158:   if (flg && nblocks > 0) { PetscCall(PCASMSetLocalSubdomains(pc, nblocks, is_loc, NULL)); }
159:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
160:                       Solve the linear system
161:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
162:   PetscCall(KSPSolve(ksp, b, x));
163:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
164:                       Check the solution and clean up
165:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
166:   PetscCall(VecAXPY(x, -1.0, u));
167:   PetscCall(VecNorm(x, NORM_2, &norm));
168:   PetscCall(KSPGetIterationNumber(ksp, &its));
169:   /*
170:      Print convergence information.
171:   */
172:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its));
173:   /*
174:     cleanup
175:   */
176:   PetscCall(KSPDestroy(&ksp));
177:   if (is_loc) {
178:     if (0) {
179:       for (PetscInt bid = 0; bid < nblocks; bid++) PetscCall(ISDestroy(&is_loc[bid]));
180:       PetscCall(PetscFree(is_loc));
181:     } else {
182:       PetscCall(PCASMDestroySubdomains(nblocks, is_loc, NULL));
183:     }
184:   }
185:   PetscCall(VecDestroy(&u));
186:   PetscCall(VecDestroy(&x));
187:   PetscCall(VecDestroy(&b));
188:   PetscCall(MatDestroy(&A));
189:   PetscCall(MatDestroy(&Pmat));
190:   PetscCall(PetscFinalize());
191:   return 0;
192: }
194: /*TEST
195:   build:
196:     requires: parmetis kokkos_kernels
197:   testset:
198:     args: -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-4 -m 37 -n 23 -num_local_blocks 4
199:     nsize: 4
200:     output_file: output/ex19_0.out
201:     test:
202:       suffix: batch
203:       args: -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_rtol 1e-1 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
204:     test:
205:       suffix: asm
206:       args: -ksp_type cg -pc_type asm -sub_pc_type jacobi -sub_ksp_type tfqmr -sub_ksp_rtol 1e-3
208:  TEST*/