Actual source code: mmbaij.c
  2: /*
  3:    Support for the parallel BAIJ matrix vector multiply
  4: */
  5: #include <../src/mat/impls/baij/mpi/mpibaij.h>
  6: #include <petsc/private/isimpl.h>
  8: PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
  9: {
 10:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
 11:   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)(baij->B->data);
 12:   PetscInt     i, j, *aj = B->j, ec = 0, *garray;
 13:   PetscInt     bs = mat->rmap->bs, *stmp;
 14:   IS           from, to;
 15:   Vec          gvec;
 16: #if defined(PETSC_USE_CTABLE)
 17:   PetscHMapI    gid1_lid1 = NULL;
 18:   PetscHashIter tpos;
 19:   PetscInt      gid, lid;
 20: #else
 21:   PetscInt Nbs = baij->Nbs, *indices;
 22: #endif
 24:   PetscFunctionBegin;
 25: #if defined(PETSC_USE_CTABLE)
 26:   /* use a table - Mark Adams */
 27:   PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1));
 28:   for (i = 0; i < B->mbs; i++) {
 29:     for (j = 0; j < B->ilen[i]; j++) {
 30:       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
 31:       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
 32:       if (!data) {
 33:         /* one based table */
 34:         PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
 35:       }
 36:     }
 37:   }
 38:   /* form array of columns we need */
 39:   PetscCall(PetscMalloc1(ec, &garray));
 40:   PetscHashIterBegin(gid1_lid1, tpos);
 41:   while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
 42:     PetscHashIterGetKey(gid1_lid1, tpos, gid);
 43:     PetscHashIterGetVal(gid1_lid1, tpos, lid);
 44:     PetscHashIterNext(gid1_lid1, tpos);
 45:     gid--;
 46:     lid--;
 47:     garray[lid] = gid;
 48:   }
 49:   PetscCall(PetscSortInt(ec, garray));
 50:   PetscCall(PetscHMapIClear(gid1_lid1));
 51:   for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
 52:   /* compact out the extra columns in B */
 53:   for (i = 0; i < B->mbs; i++) {
 54:     for (j = 0; j < B->ilen[i]; j++) {
 55:       PetscInt gid1 = aj[B->i[i] + j] + 1;
 56:       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
 57:       lid--;
 58:       aj[B->i[i] + j] = lid;
 59:     }
 60:   }
 61:   B->nbs = ec;
 62:   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
 63:   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
 64:   PetscCall(PetscHMapIDestroy(&gid1_lid1));
 65: #else
 66:   /* Make an array as long as the number of columns */
 67:   /* mark those columns that are in baij->B */
 68:   PetscCall(PetscCalloc1(Nbs, &indices));
 69:   for (i = 0; i < B->mbs; i++) {
 70:     for (j = 0; j < B->ilen[i]; j++) {
 71:       if (!indices[aj[B->i[i] + j]]) ec++;
 72:       indices[aj[B->i[i] + j]] = 1;
 73:     }
 74:   }
 76:   /* form array of columns we need */
 77:   PetscCall(PetscMalloc1(ec, &garray));
 78:   ec = 0;
 79:   for (i = 0; i < Nbs; i++) {
 80:     if (indices[i]) garray[ec++] = i;
 81:   }
 83:   /* make indices now point into garray */
 84:   for (i = 0; i < ec; i++) indices[garray[i]] = i;
 86:   /* compact out the extra columns in B */
 87:   for (i = 0; i < B->mbs; i++) {
 88:     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
 89:   }
 90:   B->nbs = ec;
 91:   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
 92:   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
 93:   PetscCall(PetscFree(indices));
 94: #endif
 96:   /* create local vector that is used to scatter into */
 97:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec));
 99:   /* create two temporary index sets for building scatter-gather */
100:   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
102:   PetscCall(PetscMalloc1(ec, &stmp));
103:   for (i = 0; i < ec; i++) stmp[i] = i;
104:   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to));
106:   /* create temporary global vector to generate scatter context */
107:   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
109:   PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx));
110:   PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
112:   baij->garray = garray;
114:   PetscCall(ISDestroy(&from));
115:   PetscCall(ISDestroy(&to));
116:   PetscCall(VecDestroy(&gvec));
117:   PetscFunctionReturn(PETSC_SUCCESS);
118: }
120: /*
121:      Takes the local part of an already assembled MPIBAIJ matrix
122:    and disassembles it. This is to allow new nonzeros into the matrix
123:    that require more communication in the matrix vector multiply.
124:    Thus certain data-structures must be rebuilt.
126:    Kind of slow! But that's what application programmers get when
127:    they are sloppy.
128: */
129: PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
130: {
131:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
132:   Mat          B    = baij->B, Bnew;
133:   Mat_SeqBAIJ *Bbaij;
134:   PetscInt     i, j, mbs, n = A->cmap->N, col, *garray = baij->garray;
135:   PetscInt     bs2 = baij->bs2, *nz = NULL, m = A->rmap->n;
136:   MatScalar   *a, *atmp;
138:   PetscFunctionBegin;
139:   /* free stuff related to matrix-vec multiply */
140:   PetscCall(VecDestroy(&baij->lvec));
141:   PetscCall(VecScatterDestroy(&baij->Mvctx));
142:   if (baij->colmap) {
143: #if defined(PETSC_USE_CTABLE)
144:     PetscCall(PetscHMapIDestroy(&baij->colmap));
145: #else
146:     PetscCall(PetscFree(baij->colmap));
147: #endif
148:   }
150:   /* make sure that B is assembled so we can access its values */
151:   if (B) {
152:     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
153:     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
154:     Bbaij = (Mat_SeqBAIJ *)B->data;
155:     mbs   = Bbaij->mbs;
156:     a     = Bbaij->a;
158:     /* invent new B and copy stuff over */
159:     PetscCall(PetscMalloc1(mbs, &nz));
160:     for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
161:     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew));
162:     PetscCall(MatSetSizes(Bnew, m, n, m, n));
163:     PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
164:     PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
165:     /*
166:      Ensure that B's nonzerostate is monotonically increasing.
167:      Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
168:      */
169:     Bnew->nonzerostate = B->nonzerostate;
170:     if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
171:       ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
172:     }
174:     PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE));
175:     for (i = 0; i < mbs; i++) {
176:       for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
177:         col  = garray[Bbaij->j[j]];
178:         atmp = a + j * bs2;
179:         PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode));
180:       }
181:     }
182:     PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE));
184:     PetscCall(PetscFree(nz));
185:     PetscCall(MatDestroy(&B));
187:     baij->B = Bnew;
188:   }
189:   PetscCall(PetscFree(baij->garray));
190:   A->was_assembled = PETSC_FALSE;
191:   A->assembled     = PETSC_FALSE;
192:   PetscFunctionReturn(PETSC_SUCCESS);
193: }
195: /*      ugly stuff added for Glenn someday we should fix this up */
197: static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
198: static Vec       uglydd = NULL, uglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
200: PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
201: {
202:   Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
203:   Mat_SeqBAIJ *B   = (Mat_SeqBAIJ *)ina->B->data;
204:   PetscInt     bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
205:   PetscInt    *r_rmapd, *r_rmapo;
207:   PetscFunctionBegin;
208:   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
209:   PetscCall(MatGetSize(ina->A, NULL, &n));
210:   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
211:   nt = 0;
212:   for (i = 0; i < inA->rmap->mapping->n; i++) {
213:     if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
214:       nt++;
215:       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
216:     }
217:   }
218:   PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
219:   PetscCall(PetscMalloc1(n + 1, &uglyrmapd));
220:   for (i = 0; i < inA->rmap->mapping->n; i++) {
221:     if (r_rmapd[i]) {
222:       for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
223:     }
224:   }
225:   PetscCall(PetscFree(r_rmapd));
226:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd));
228:   PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices));
229:   for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1;
230:   no = inA->rmap->mapping->n - nt;
231:   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
232:   nt = 0;
233:   for (i = 0; i < inA->rmap->mapping->n; i++) {
234:     if (lindices[inA->rmap->mapping->indices[i]]) {
235:       nt++;
236:       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
237:     }
238:   }
239:   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
240:   PetscCall(PetscFree(lindices));
241:   PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo));
242:   for (i = 0; i < inA->rmap->mapping->n; i++) {
243:     if (r_rmapo[i]) {
244:       for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
245:     }
246:   }
247:   PetscCall(PetscFree(r_rmapo));
248:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo));
249:   PetscFunctionReturn(PETSC_SUCCESS);
250: }
252: PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale)
253: {
254:   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
256:   PetscFunctionBegin;
257:   PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale));
258:   PetscFunctionReturn(PETSC_SUCCESS);
259: }
261: PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale)
262: {
263:   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
264:   PetscInt           n, i;
265:   PetscScalar       *d, *o;
266:   const PetscScalar *s;
268:   PetscFunctionBegin;
269:   if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale));
271:   PetscCall(VecGetArrayRead(scale, &s));
273:   PetscCall(VecGetLocalSize(uglydd, &n));
274:   PetscCall(VecGetArray(uglydd, &d));
275:   for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
276:   PetscCall(VecRestoreArray(uglydd, &d));
277:   /* column scale "diagonal" portion of local matrix */
278:   PetscCall(MatDiagonalScale(a->A, NULL, uglydd));
280:   PetscCall(VecGetLocalSize(uglyoo, &n));
281:   PetscCall(VecGetArray(uglyoo, &o));
282:   for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
283:   PetscCall(VecRestoreArrayRead(scale, &s));
284:   PetscCall(VecRestoreArray(uglyoo, &o));
285:   /* column scale "off-diagonal" portion of local matrix */
286:   PetscCall(MatDiagonalScale(a->B, NULL, uglyoo));
287:   PetscFunctionReturn(PETSC_SUCCESS);
288: }