Actual source code: snesngmres.c
  1: /* Defines the basic SNES object */
  2: #include <../src/snes/impls/ngmres/snesngmres.h>
  3: #include <petscblaslapack.h>
  6: /*MC
  7:   SNESNGMRES - The Nonlinear Generalized Minimum Residual (NGMRES) method of Oosterlee and Washio.
  9:    Level: beginner
 11:    "Krylov Subspace Acceleration of Nonlinear Multigrid with Application to Recirculating Flows", C. W. Oosterlee and T. Washio,
 12:    SIAM Journal on Scientific Computing, 21(5), 2000.
 14: .seealso: SNESCreate(), SNES, SNESSetType(), SNESType (for list of available types)
 15: M*/
 19: PetscErrorCode SNESReset_NGMRES(SNES snes)
 20: {
 21:   SNES_NGMRES   *ngmres = (SNES_NGMRES*) snes->data;
 25:   VecDestroyVecs(ngmres->msize, &ngmres->rdot);
 26:   VecDestroyVecs(ngmres->msize, &ngmres->xdot);
 27:   return(0);
 28: }
 32: PetscErrorCode SNESDestroy_NGMRES(SNES snes)
 33: {
 37:   SNESReset_NGMRES(snes);
 38:   if (snes->work) {VecDestroyVecs(snes->nwork, &snes->work);}
 39:   if (snes->data) {
 40:     SNES_NGMRES * ngmres = (SNES_NGMRES *)snes->data;
 41:     PetscFree5(ngmres->h, ngmres->beta, ngmres->xi, ngmres->r_norms, ngmres->q);
 42:     PetscFree(ngmres->s);
 43: #if PETSC_USE_COMPLEX
 44:     PetscFree(ngmres->rwork);
 45: #endif
 46:     PetscFree(ngmres->work);
 47:   }
 48:   PetscFree(snes->data);
 49:   return(0);
 50: }
 54: PetscErrorCode SNESSetUp_NGMRES(SNES snes)
 55: {
 56:   SNES_NGMRES   *ngmres = (SNES_NGMRES *) snes->data;
 57:   PetscInt msize,hsize;
 61:   msize         = ngmres->msize;  /* restart size */
 62:   hsize         = msize * msize;
 65:   /* explicit least squares minimization solve */
 66:   PetscMalloc5(hsize,PetscScalar,&ngmres->h,
 67:                       msize,PetscScalar,&ngmres->beta,
 68:                       msize,PetscScalar,&ngmres->xi,
 69:                       msize,PetscReal,&ngmres->r_norms,
 70:                       hsize,PetscScalar,&ngmres->q);
 71:   ngmres->nrhs = 1;
 72:   ngmres->lda = msize;
 73:   ngmres->ldb = msize;
 74:   PetscMalloc(msize*sizeof(PetscScalar),&ngmres->s);
 75: 
 76:   PetscMemzero(ngmres->h,hsize*sizeof(PetscScalar));
 77:   PetscMemzero(ngmres->q,hsize*sizeof(PetscScalar));
 78:   PetscMemzero(ngmres->xi,msize*sizeof(PetscScalar));
 79:   PetscMemzero(ngmres->beta,msize*sizeof(PetscScalar));
 80: 
 81:   ngmres->lwork = 12*msize;
 82: #if PETSC_USE_COMPLEX
 83:   PetscMalloc(sizeof(PetscReal)*ngmres->lwork,&ngmres->rwork);
 84: #endif
 85:   PetscMalloc(sizeof(PetscScalar)*ngmres->lwork,&ngmres->work);
 87:   VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->xdot);
 88:   VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->rdot);
 89:   SNESDefaultGetWork(snes, 2);
 90:   return(0);
 91: }
 95: PetscErrorCode SNESSetFromOptions_NGMRES(SNES snes)
 96: {
 97:   SNES_NGMRES   *ngmres = (SNES_NGMRES *) snes->data;
 99: 
101:   PetscOptionsHead("SNES NGMRES options");
102:   PetscOptionsInt("-snes_ngmres_m", "Number of directions", "SNES", ngmres->msize, &ngmres->msize, PETSC_NULL);
103:   PetscOptionsInt("-snes_ngmres_restart", "Maximum iterations before restart.", "SNES", ngmres->k_rmax, &ngmres->k_rmax, PETSC_NULL);
104:   PetscOptionsBool("-snes_ngmres_debug", "Debugging output for NGMRES", "SNES", ngmres->debug, &ngmres->debug, PETSC_NULL);
105:   PetscOptionsReal("-snes_ngmres_gammaA", "Residual selection constant", "SNES", ngmres->gammaA, &ngmres->gammaA, PETSC_NULL);
106:   PetscOptionsReal("-snes_ngmres_gammaC", "Residual restart constant", "SNES", ngmres->gammaC, &ngmres->gammaC, PETSC_NULL);
107:   PetscOptionsReal("-snes_ngmres_epsilonB", "Difference selection constant", "SNES", ngmres->epsilonB, &ngmres->epsilonB, PETSC_NULL);
108:   PetscOptionsReal("-snes_ngmres_deltaB", "Difference residual selection constant", "SNES", ngmres->deltaB, &ngmres->deltaB, PETSC_NULL);
109:   PetscOptionsTail();
110:   if ((ngmres->gammaA > ngmres->gammaC) && (ngmres->gammaC > 2.)) ngmres->gammaC = ngmres->gammaA;
111:   return(0);
112: }
116: PetscErrorCode SNESView_NGMRES(SNES snes, PetscViewer viewer)
117: {
118:   SNES_NGMRES   *ngmres = (SNES_NGMRES *) snes->data;
119:   PetscBool      iascii;
123:   PetscTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &iascii);
124:   if (iascii) {
125:     PetscViewerASCIIPrintf(viewer, "  Size of space %d\n", ngmres->msize);
126:     PetscViewerASCIIPrintf(viewer, "  Maximum iterations before restart %d\n", ngmres->k_rmax);
127:   }
128:   return(0);
129: }
135: PetscErrorCode SNESSolve_NGMRES(SNES snes)
136: {
137:   SNES           pc;
138:   SNES_NGMRES   *ngmres = (SNES_NGMRES *) snes->data;
140: 
141: 
142:   /* present solution, residual, and preconditioned residual */
143:   Vec            x, r, b, d;
144:   Vec            x_A, r_A;
146:   /* previous iterations to construct the subspace */
147:   Vec            *rdot = ngmres->rdot;
148:   Vec            *xdot = ngmres->xdot;
150:   /* coefficients and RHS to the minimization problem */
151:   PetscScalar    *beta = ngmres->beta;
152:   PetscScalar    *xi = ngmres->xi;
153:   PetscReal      r_norm, r_A_norm;
154:   PetscReal      nu;
155:   PetscScalar    alph_total = 0.;
156:   PetscScalar    qentry;
157:   PetscInt       i, j, k, k_restart, l, ivec;
159:   /* solution selection data */
160:   PetscBool selectA, selectRestart;
161:   PetscReal d_norm, d_min_norm, d_cur_norm;
162:   PetscReal r_min_norm;
167:   /* variable initialization */
168:   snes->reason  = SNES_CONVERGED_ITERATING;
169:   x             = snes->vec_sol;
170:   r             = snes->vec_func;
171:   b             = snes->vec_rhs;
172:   x_A           = snes->vec_sol_update;
173:   r_A           = snes->work[0];
174:   d             = snes->work[1];
175:   VecDuplicate(x,&r);
177:   SNESGetPC(snes, &pc);
178:   PetscObjectTakeAccess(snes);
179:   snes->iter = 0;
180:   snes->norm = 0.;
181:   PetscObjectGrantAccess(snes);
183:   /* initialization */
185:   /* r = F(x) */
186:   SNESComputeFunction(snes, x, r);
187:   if (snes->domainerror) {
188:     snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN;
189:     return(0);
190:   }
192:   /* nu = (r, r) */
193:   VecNorm(r, NORM_2, &r_norm);
194:   r_min_norm = r_norm;
195:   nu = r_norm*r_norm;
196:   if (PetscIsInfOrNanReal(r_norm)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_FP, "Infinite or not-a-number generated in norm");
198:   /* q_{00} = nu  */
199:   Q(0,0) = nu;
200:   ngmres->r_norms[0] = r_norm;
201:   /* rdot[0] = r */
202:   VecCopy(x, xdot[0]);
203:   VecCopy(r, rdot[0]);
205:   PetscObjectTakeAccess(snes);
206:   snes->norm = r_norm;
207:   PetscObjectGrantAccess(snes);
208:   SNESLogConvHistory(snes, r_norm, 0);
209:   SNESMonitor(snes, 0, r_norm);
210:   (*snes->ops->converged)(snes,0,0.0,0.0,r_norm,&snes->reason,snes->cnvP);
211:   if (snes->reason) return(0);
213:   k_restart = 1;
214:   l = 1;
215:   for (k=1; k<snes->max_its; k++) {
217:     /* select which vector of the stored subspace will be updated */
218:     ivec = k_restart % ngmres->msize; /* replace the last used part of the subspace */
221:     /* Computation of x^M */
222:     SNESSolve(pc, b, x);
223:     /* r = F(x) */
224:     SNESComputeFunction(snes, x, r);
225:     VecNorm(r, NORM_2, &r_norm);
226:     /* nu = (r, r) */
227:     ngmres->r_norms[ivec] = r_norm;
228:     nu = r_norm*r_norm;
229:     if (r_min_norm > r_norm) r_min_norm = r_norm;  /* the minimum norm is now of r^M */
231:     /* construct the right hand side and xi factors */
232:     for (i = 0; i < l; i++) {
233:       VecDot(rdot[i], r, &xi[i]);
234:       beta[i] = nu - xi[i];
235:     }
237:     /* construct h */
238:     for (j = 0; j < l; j++) {
239:       for (i = 0; i < l; i++) {
240:         H(i, j) = Q(i, j) - xi[i] - xi[j] + nu;
241:       }
242:     }
243: #ifdef PETSC_MISSING_LAPACK_GELSS
244:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "NGMRES with LS requires the LAPACK GELSS routine.");
245: #else
246:     ngmres->m = PetscBLASIntCast(l);
247:     ngmres->n = PetscBLASIntCast(l);
248:     ngmres->info = PetscBLASIntCast(0);
249:     ngmres->rcond = -1.;
250:     ngmres->nrhs = 1;
251: #ifdef PETSC_USE_COMPLEX
252:     LAPACKgelss_(&ngmres->m,
253:                  &ngmres->n,
254:                  &ngmres->nrhs,
255:                  ngmres->h,
256:                  &ngmres->lda,
257:                  ngmres->beta,
258:                  &ngmres->ldb,
259:                  ngmres->s,
260:                  &ngmres->rcond,
261:                  &ngmres->rank,
262:                  ngmres->work,
263:                  &ngmres->lwork,
264:                  ngmres->rwork,
265:                  &ngmres->info);
266: #else
267:     LAPACKgelss_(&ngmres->m,
268:                  &ngmres->n,
269:                  &ngmres->nrhs,
270:                  ngmres->h,
271:                  &ngmres->lda,
272:                  ngmres->beta,
273:                  &ngmres->ldb,
274:                  ngmres->s,
275:                  &ngmres->rcond,
276:                  &ngmres->rank,
277:                  ngmres->work,
278:                  &ngmres->lwork,
279:                  &ngmres->info);
280: #endif
281:     if (ngmres->info < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to GELSS");
282:     if (ngmres->info > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"SVD failed to converge");
283: #endif
285:     alph_total = 0.;
286:     for (i = 0; i < l; i++) {
287:       alph_total += beta[i];
288:     }
289:     VecCopy(x, x_A);
290:     VecScale(x_A, 1. - alph_total);
292:     for(i=0;i<l;i++){
293:       ierr= VecAXPY(x_A, beta[i], xdot[i]);
294:     }
295:     SNESComputeFunction(snes, x_A, r_A);
296:     VecNorm(r_A, NORM_2, &r_A_norm);
298:     selectA = PETSC_TRUE;
299:     /* Conditions for choosing the accelerated answer */
301:     /* Criterion A -- the norm of the function isn't increased above the minimum by too much */
302:     if (r_A_norm >= ngmres->gammaA*r_min_norm) {
303:       selectA = PETSC_FALSE;
304:     }
305: 
306:     /* Criterion B -- the choice of x^A isn't too close to some other choice */
307:     ierr=VecCopy(x_A,d);
308:     ierr=VecAXPY(d,-1,x);
309:     ierr=VecNorm(d,NORM_2,&d_norm);
310:     d_min_norm = -1.0;
311:     for(i=0;i<l;i++) {
312:       ierr=VecCopy(x_A,d);
313:       ierr=VecAXPY(d,-1,xdot[i]);
314:       ierr=VecNorm(d,NORM_2,&d_cur_norm);
315:       if((d_cur_norm < d_min_norm) || (d_min_norm < 0.0)) d_min_norm = d_cur_norm;
316:     }
317:     if (ngmres->epsilonB*d_norm<d_min_norm || sqrt(r_norm)<ngmres->deltaB*sqrt(r_min_norm)) {
318:     } else {
319:       selectA=PETSC_FALSE;
320:     }
323:     if (selectA) {
324:       if (ngmres->debug)
325:         PetscPrintf(PETSC_COMM_WORLD, "picked r_A, ||r_A||_2 = %e, ||r_M||_2 = %e\n", r_A_norm, r_norm);
326:       /* copy it over */
327:       r_norm = r_A_norm;
328:       nu = r_norm*r_norm;
329:       VecCopy(r_A, r);
330:       VecCopy(x_A, x);
331:     } else {
332:       if(ngmres->debug)
333:         PetscPrintf(PETSC_COMM_WORLD, "picked r_M, ||r_A||_2 = %e, ||r_M||_2 = %e\n", r_A_norm, r_norm);
334:     }
336:     selectRestart = PETSC_FALSE;
337: 
338:     /* maximum iteration criterion */
339:     if (k_restart > ngmres->k_rmax) {
340:       selectRestart = PETSC_TRUE;
341:     }
343:     /* difference stagnation restart */
344:     if         ((ngmres->epsilonB*d_norm > d_min_norm) &&
345:          (sqrt(r_A_norm) > ngmres->deltaB*sqrt(r_min_norm))) {
346:       if (ngmres->debug)
347:         PetscPrintf(PETSC_COMM_WORLD, "difference restart: %e > %e\n", ngmres->epsilonB*d_norm, d_min_norm);
348:       selectRestart = PETSC_TRUE;
349:     }
350: 
351:     /* residual stagnation restart */
352:     if (sqrt(r_A_norm) > ngmres->gammaC*sqrt(r_min_norm)) {
353:       if (ngmres->debug)
354:         PetscPrintf(PETSC_COMM_WORLD, "residual restart: %e > %e\n", sqrt(r_A_norm), ngmres->gammaC*sqrt(r_min_norm));
355:       selectRestart = PETSC_TRUE;
356:     }
358:     if (selectRestart) {
359:       if (ngmres->debug)
360:         PetscPrintf(PETSC_COMM_WORLD, "Restarted at iteration %d\n", k_restart);
361:       k_restart = 1;
362:       l = 1;
363:       /* q_{00} = nu */
364:       ngmres->r_norms[0] = r_norm;
365:       nu = r_norm*r_norm;
366:       Q(0,0) = nu;
367:       /* rdot[0] = r */
368:       VecCopy(x, xdot[0]);
369:       VecCopy(r, rdot[0]);
370:     } else {
371:       /* select the current size of the subspace */
372:       if (l < ngmres->msize) {
373:         l++;
374:       }
375:       k_restart++;
376:       /* place the current entry in the list of previous entries */
377:       VecCopy(r, rdot[ivec]);
378:       VecCopy(x, xdot[ivec]);
379:       ngmres->r_norms[ivec] = r_norm;
380:       if (r_min_norm > r_norm) r_min_norm = r_norm;  /* the minimum norm is now of r^A */
381:       for (i = 0; i < l; i++) {
382:         VecDot(r, rdot[i], &qentry);
383:         Q(i, ivec) = qentry;
384:         Q(ivec, i) = qentry;
385:       }
386:     }
387: 
388:     SNESLogConvHistory(snes, r_norm, k);
389:     SNESMonitor(snes, k, r_norm);
391:     snes->iter =k;
392:     (*snes->ops->converged)(snes,snes->iter,0.0,0.0,r_norm,&snes->reason,snes->cnvP);
393:     if (snes->reason) return(0);
394:   }
395:   snes->reason = SNES_DIVERGED_MAX_IT;
396:   return(0);
397: }
404: PetscErrorCode SNESCreate_NGMRES(SNES snes)
405: {
406:   SNES_NGMRES   *ngmres;
410:   snes->ops->destroy        = SNESDestroy_NGMRES;
411:   snes->ops->setup          = SNESSetUp_NGMRES;
412:   snes->ops->setfromoptions = SNESSetFromOptions_NGMRES;
413:   snes->ops->view           = SNESView_NGMRES;
414:   snes->ops->solve          = SNESSolve_NGMRES;
415:   snes->ops->reset          = SNESReset_NGMRES;
417:   PetscNewLog(snes, SNES_NGMRES, &ngmres);
418:   snes->data = (void*) ngmres;
419:   ngmres->msize = 10;
420:   ngmres->debug = PETSC_FALSE;
422:   ngmres->gammaA = 2.;
423:   ngmres->gammaC = 2.;
424:   ngmres->deltaB = 0.9;
425:   ngmres->epsilonB = 0.1;
426:   ngmres->k_rmax = 200;
428:   SNESGetPC(snes, &snes->pc);
429:   return(0);
430: }