Actual source code: nepdefl.c
slepc-3.18.0 2022-10-01
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/nepimpl.h>
12: #include <slepcblaslapack.h>
13: #include "nepdefl.h"
15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16: {
17: if (X) *X = extop->X;
18: if (H) MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H);
19: return 0;
20: }
22: PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
23: {
24: Vec uu;
25: PetscInt ld,i;
26: PetscMPIInt np;
27: PetscReal norm;
29: BVGetColumn(extop->X,k,&uu);
30: ld = extop->szd+1;
31: NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE);
32: BVRestoreColumn(extop->X,k,&uu);
33: BVNormColumn(extop->X,k,NORM_2,&norm);
34: BVScaleColumn(extop->X,k,1.0/norm);
35: MPI_Comm_size(PetscObjectComm((PetscObject)u),&np);
36: for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
37: extop->H[k*(ld+1)] = lambda;
38: return 0;
39: }
41: PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
42: {
43: Mat A,H;
44: PetscInt ldh=extop->szd+1,ldds,k1=k+1;
45: PetscScalar *eigr,*eigi,*t,*Z;
46: Vec x;
48: NEPDeflationExtendInvariantPair(extop,u,lambda,k);
49: MatCreateSeqDense(PETSC_COMM_SELF,k1,k1,extop->H,&H);
50: MatDenseSetLDA(H,ldh);
51: PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t);
52: DSReset(ds);
53: DSSetType(ds,DSNHEP);
54: DSAllocate(ds,ldh);
55: DSGetLeadingDimension(ds,&ldds);
56: DSSetDimensions(ds,k1,0,k1);
57: DSGetMat(ds,DS_MAT_A,&A);
58: MatCopy(H,A,SAME_NONZERO_PATTERN);
59: DSRestoreMat(ds,DS_MAT_A,&A);
60: MatDestroy(&H);
61: DSSolve(ds,eigr,eigi);
62: DSVectors(ds,DS_MAT_X,&k,NULL);
63: DSGetArray(ds,DS_MAT_X,&Z);
64: BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds);
65: DSRestoreArray(ds,DS_MAT_X,&Z);
66: BVGetColumn(extop->X,k,&x);
67: NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE);
68: BVRestoreColumn(extop->X,k,&x);
69: PetscFree3(eigr,eigi,t);
70: return 0;
71: }
73: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
74: {
75: PetscMPIInt np,rk,count;
76: PetscScalar *array1,*array2;
77: PetscInt nloc;
79: if (extop->szd) {
80: MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk);
81: MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np);
82: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
83: if (v) {
84: VecGetArray(v,&array1);
85: VecGetArray(vex,&array2);
86: if (back) PetscArraycpy(array1,array2,nloc);
87: else PetscArraycpy(array2,array1,nloc);
88: VecRestoreArray(v,&array1);
89: VecRestoreArray(vex,&array2);
90: }
91: if (a) {
92: VecGetArray(vex,&array2);
93: if (back) {
94: PetscArraycpy(a,array2+nloc,extop->szd);
95: PetscMPIIntCast(extop->szd,&count);
96: MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex));
97: } else {
98: PetscArraycpy(array2+nloc,a,extop->szd);
99: PetscMPIIntCast(extop->szd,&count);
100: MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex));
101: }
102: VecRestoreArray(vex,&array2);
103: }
104: } else {
105: if (back) VecCopy(vex,v);
106: else VecCopy(v,vex);
107: }
108: return 0;
109: }
111: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
112: {
113: PetscInt nloc;
114: Vec u;
115: VecType type;
117: if (extop->szd) {
118: BVGetColumn(extop->nep->V,0,&u);
119: VecGetType(u,&type);
120: BVRestoreColumn(extop->nep->V,0,&u);
121: VecCreate(PetscObjectComm((PetscObject)extop->nep),v);
122: VecSetType(*v,type);
123: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
124: nloc += extop->szd;
125: VecSetSizes(*v,nloc,PETSC_DECIDE);
126: } else BVCreateVec(extop->nep->V,v);
127: return 0;
128: }
130: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
131: {
132: PetscInt nloc;
133: BVType type;
134: BVOrthogType otype;
135: BVOrthogRefineType oref;
136: PetscReal oeta;
137: BVOrthogBlockType oblock;
138: NEP nep=extop->nep;
140: if (extop->szd) {
141: BVGetSizes(nep->V,&nloc,NULL,NULL);
142: BVCreate(PetscObjectComm((PetscObject)nep),V);
143: BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz);
144: BVGetType(nep->V,&type);
145: BVSetType(*V,type);
146: BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock);
147: BVSetOrthogonalization(*V,otype,oref,oeta,oblock);
148: PetscObjectStateIncrease((PetscObject)*V);
149: } else BVDuplicateResize(nep->V,sz,V);
150: return 0;
151: }
153: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
154: {
155: PetscInt n,next,i;
156: PetscRandom rand;
157: PetscScalar *array;
158: PetscMPIInt nn,np;
160: BVGetRandomContext(extop->nep->V,&rand);
161: VecSetRandom(v,rand);
162: if (extop->szd) {
163: MPI_Comm_size(PetscObjectComm((PetscObject)v),&np);
164: BVGetSizes(extop->nep->V,&n,NULL,NULL);
165: VecGetLocalSize(v,&next);
166: VecGetArray(v,&array);
167: for (i=n+extop->n;i<next;i++) array[i] = 0.0;
168: for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
169: PetscMPIIntCast(extop->n,&nn);
170: MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v));
171: VecRestoreArray(v,&array);
172: }
173: return 0;
174: }
176: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
177: {
178: PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
179: PetscScalar sone=1.0,zero=0.0;
180: PetscBLASInt ldh_,ldhj_,n_;
182: i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
183: PetscArrayzero(Hj,i);
184: PetscBLASIntCast(ldhj+1,&ldh_);
185: PetscBLASIntCast(ldhj,&ldhj_);
186: PetscBLASIntCast(n,&n_);
187: if (idx<1) {
188: if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
189: else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
190: } else {
191: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
192: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
193: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
194: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
195: }
196: if (idx<0) {
197: idx = -idx;
198: for (k=1;k<idx;k++) {
199: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
200: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
201: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
202: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
203: }
204: }
205: return 0;
206: }
208: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
209: {
210: PetscInt i;
212: NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n);
213: extop->n++;
214: BVSetActiveColumns(extop->X,0,extop->n);
215: if (extop->n <= extop->szd) {
216: /* update XpX */
217: BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd);
218: extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
219: for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
220: /* determine minimality index */
221: extop->midx = PetscMin(extop->max_midx,extop->n);
222: /* polynominal basis coefficients */
223: for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
224: /* evaluate the polynomial basis in H */
225: NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL);
226: }
227: return 0;
228: }
230: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
231: {
232: PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
233: Mat A,B;
234: PetscScalar *array;
235: const PetscScalar *barray;
237: if (idx<0) {ini = 0; fin = extop->nep->nt;}
238: else {ini = idx; fin = idx+1;}
239: if (y) sz = hfjp?n+2:n+1;
240: else sz = hfjp?3*n:2*n;
241: ldh = extop->szd+1;
242: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A);
243: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B);
244: MatDenseGetArray(A,&array);
245: for (j=0;j<n;j++)
246: for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
247: MatDenseRestoreArrayWrite(A,&array);
248: if (y) {
249: MatDenseGetArray(A,&array);
250: array[extop->n*(sz+1)] = lambda;
251: if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
252: for (i=0;i<n;i++) array[n*sz+i] = y[i];
253: MatDenseRestoreArrayWrite(A,&array);
254: for (j=ini;j<fin;j++) {
255: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
256: MatDenseGetArrayRead(B,&barray);
257: for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
258: if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
259: MatDenseRestoreArrayRead(B,&barray);
260: }
261: } else {
262: off = idx<0?ld*n:0;
263: MatDenseGetArray(A,&array);
264: for (k=0;k<n;k++) {
265: array[(n+k)*sz+k] = 1.0;
266: array[(n+k)*sz+n+k] = lambda;
267: }
268: if (hfjp) for (k=0;k<n;k++) {
269: array[(2*n+k)*sz+n+k] = 1.0;
270: array[(2*n+k)*sz+2*n+k] = lambda;
271: }
272: MatDenseRestoreArray(A,&array);
273: for (j=ini;j<fin;j++) {
274: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
275: MatDenseGetArrayRead(B,&barray);
276: for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
277: if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
278: MatDenseRestoreArrayRead(B,&barray);
279: }
280: }
281: MatDestroy(&A);
282: MatDestroy(&B);
283: return 0;
284: }
286: static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
287: {
288: NEP_DEF_MATSHELL *matctx;
289: NEP_EXT_OP extop;
290: Vec x1,y1;
291: PetscScalar *yy,sone=1.0,zero=0.0;
292: const PetscScalar *xx;
293: PetscInt nloc,i;
294: PetscMPIInt np;
295: PetscBLASInt n_,one=1,szd_;
297: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
298: MatShellGetContext(M,&matctx);
299: extop = matctx->extop;
300: if (extop->ref) VecZeroEntries(y);
301: if (extop->szd) {
302: x1 = matctx->w[0]; y1 = matctx->w[1];
303: VecGetArrayRead(x,&xx);
304: VecPlaceArray(x1,xx);
305: VecGetArray(y,&yy);
306: VecPlaceArray(y1,yy);
307: MatMult(matctx->T,x1,y1);
308: if (!extop->ref && extop->n) {
309: VecGetLocalSize(x1,&nloc);
310: /* copy for avoiding warning of constant array xx */
311: for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
312: BVMultVec(matctx->U,1.0,1.0,y1,matctx->work);
313: BVDotVec(extop->X,x1,matctx->work);
314: PetscBLASIntCast(extop->n,&n_);
315: PetscBLASIntCast(extop->szd,&szd_);
316: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
317: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
318: for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
319: }
320: VecResetArray(x1);
321: VecRestoreArrayRead(x,&xx);
322: VecResetArray(y1);
323: VecRestoreArray(y,&yy);
324: } else MatMult(matctx->T,x,y);
325: return 0;
326: }
328: static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
329: {
330: NEP_DEF_MATSHELL *matctx;
332: MatShellGetContext(M,&matctx);
333: if (right) VecDuplicate(matctx->w[0],right);
334: if (left) VecDuplicate(matctx->w[0],left);
335: return 0;
336: }
338: static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
339: {
340: NEP_DEF_MATSHELL *matctx;
342: MatShellGetContext(M,&matctx);
343: if (matctx->extop->szd) {
344: BVDestroy(&matctx->U);
345: PetscFree2(matctx->hfj,matctx->work);
346: PetscFree2(matctx->A,matctx->B);
347: VecDestroy(&matctx->w[0]);
348: VecDestroy(&matctx->w[1]);
349: }
350: if (matctx->P != matctx->T) MatDestroy(&matctx->P);
351: MatDestroy(&matctx->T);
352: PetscFree(matctx);
353: return 0;
354: }
356: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
357: {
358: PetscScalar p;
359: PetscInt i;
361: if (!jacobian) {
362: val[0] = 1.0;
363: for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
364: } else {
365: val[0] = 0.0;
366: p = 1.0;
367: for (i=1;i<extop->n;i++) {
368: val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
369: p *= (lambda-extop->bc[i-1]);
370: }
371: }
372: return 0;
373: }
375: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
376: {
377: NEP_DEF_MATSHELL *matctx,*matctxC;
378: PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
379: Mat F,Mshell,Mcomp;
380: PetscBool ini=PETSC_FALSE;
381: PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
382: PetscBLASInt n_,info,szd_;
384: if (!M) Mshell = jacobian?extop->MJ:extop->MF;
385: else Mshell = *M;
386: Mcomp = jacobian?extop->MF:extop->MJ;
387: if (!Mshell) {
388: ini = PETSC_TRUE;
389: PetscNew(&matctx);
390: MatGetLocalSize(extop->nep->function,&mloc,&nloc);
391: nloc += szd; mloc += szd;
392: MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell);
393: MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation);
394: MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation);
395: MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation);
396: matctx->nep = extop->nep;
397: matctx->extop = extop;
398: if (!M) {
399: if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
400: else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
401: PetscObjectReference((PetscObject)matctx->T);
402: if (!jacobian) {
403: if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) {
404: matctx->P = extop->nep->function_pre;
405: PetscObjectReference((PetscObject)matctx->P);
406: } else matctx->P = matctx->T;
407: }
408: } else {
409: matctx->jacob = jacobian;
410: MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES,&matctx->T);
411: *M = Mshell;
412: if (!jacobian) {
413: if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) MatDuplicate(extop->nep->function_pre,MAT_DO_NOT_COPY_VALUES,&matctx->P);
414: else matctx->P = matctx->T;
415: }
416: }
417: if (szd) {
418: BVCreateVec(extop->nep->V,matctx->w);
419: VecDuplicate(matctx->w[0],matctx->w+1);
420: BVDuplicateResize(extop->nep->V,szd,&matctx->U);
421: PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work);
422: PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B);
423: }
424: } else MatShellGetContext(Mshell,&matctx);
425: if (ini || matctx->theta != lambda || matctx->n != extop->n) {
426: if (ini || matctx->theta != lambda) {
427: if (jacobian) NEPComputeJacobian(extop->nep,lambda,matctx->T);
428: else NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->P);
429: }
430: if (n) {
431: matctx->hfjset = PETSC_FALSE;
432: if (!extop->simpU) {
433: /* likely hfjp has been already computed */
434: if (Mcomp) {
435: MatShellGetContext(Mcomp,&matctxC);
436: if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
437: PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt);
438: matctx->hfjset = PETSC_TRUE;
439: }
440: }
441: hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
442: if (!matctx->hfjset) {
443: NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n);
444: matctx->hfjset = PETSC_TRUE;
445: }
446: BVSetActiveColumns(matctx->U,0,n);
447: hf = jacobian?hfjp:hfj;
448: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F);
449: BVMatMult(extop->X,extop->nep->A[0],matctx->U);
450: BVMultInPlace(matctx->U,F,0,n);
451: BVSetActiveColumns(extop->W,0,extop->n);
452: for (j=1;j<extop->nep->nt;j++) {
453: BVMatMult(extop->X,extop->nep->A[j],extop->W);
454: MatDensePlaceArray(F,hf+j*n*n);
455: BVMult(matctx->U,1.0,1.0,extop->W,F);
456: MatDenseResetArray(F);
457: }
458: MatDestroy(&F);
459: } else {
460: hfj = matctx->hfj;
461: BVSetActiveColumns(matctx->U,0,n);
462: BVMatMult(extop->X,matctx->T,matctx->U);
463: for (j=0;j<n;j++) {
464: for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
465: hfj[j*(n+1)] += lambda;
466: }
467: PetscBLASIntCast(n,&n_);
468: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
469: PetscCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
470: PetscFPTrapPop();
471: SlepcCheckLapackInfo("trtri",info);
472: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F);
473: BVMultInPlace(matctx->U,F,0,n);
474: if (jacobian) {
475: NEPDeflationComputeFunction(extop,lambda,NULL);
476: MatShellGetContext(extop->MF,&matctxC);
477: BVMult(matctx->U,-1.0,1.0,matctxC->U,F);
478: }
479: MatDestroy(&F);
480: }
481: PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev);
482: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian);
483: A = matctx->A;
484: PetscArrayzero(A,szd*szd);
485: if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
486: for (j=0;j<n;j++)
487: for (i=0;i<n;i++)
488: for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
489: PetscBLASIntCast(n,&n_);
490: PetscBLASIntCast(szd,&szd_);
491: B = matctx->B;
492: PetscArrayzero(B,szd*szd);
493: for (i=1;i<extop->midx;i++) {
494: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
495: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
496: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
497: pts = hHprev; hHprev = hH; hH = pts;
498: }
499: PetscFree3(basisv,hH,hHprev);
500: }
501: matctx->theta = lambda;
502: matctx->n = extop->n;
503: }
504: return 0;
505: }
507: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
508: {
509: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL);
510: if (F) *F = extop->MF;
511: return 0;
512: }
514: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
515: {
516: NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL);
517: if (J) *J = extop->MJ;
518: return 0;
519: }
521: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
522: {
523: NEP_DEF_MATSHELL *matctx;
524: NEP_DEF_FUN_SOLVE solve;
525: PetscInt i,j,n=extop->n;
526: Vec u,tu;
527: Mat F;
528: PetscScalar snone=-1.0,sone=1.0;
529: PetscBLASInt n_,szd_,ldh_,*p,info;
530: Mat Mshell;
532: solve = extop->solve;
533: if (lambda!=solve->theta || n!=solve->n) {
534: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T);
535: Mshell = (solve->sincf)?extop->MF:solve->T;
536: MatShellGetContext(Mshell,&matctx);
537: NEP_KSPSetOperators(solve->ksp,matctx->T,matctx->P);
538: if (!extop->ref && n) {
539: PetscBLASIntCast(n,&n_);
540: PetscBLASIntCast(extop->szd,&szd_);
541: PetscBLASIntCast(extop->szd+1,&ldh_);
542: if (!extop->simpU) {
543: BVSetActiveColumns(solve->T_1U,0,n);
544: for (i=0;i<n;i++) {
545: BVGetColumn(matctx->U,i,&u);
546: BVGetColumn(solve->T_1U,i,&tu);
547: KSPSolve(solve->ksp,u,tu);
548: BVRestoreColumn(solve->T_1U,i,&tu);
549: BVRestoreColumn(matctx->U,i,&u);
550: }
551: MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F);
552: BVDot(solve->T_1U,extop->X,F);
553: MatDestroy(&F);
554: } else {
555: for (j=0;j<n;j++)
556: for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
557: for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
558: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
559: for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
560: }
561: PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd);
562: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
563: PetscMalloc1(n,&p);
564: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
565: PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
566: SlepcCheckLapackInfo("getrf",info);
567: PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
568: SlepcCheckLapackInfo("getri",info);
569: PetscFPTrapPop();
570: PetscFree(p);
571: }
572: solve->theta = lambda;
573: solve->n = n;
574: }
575: return 0;
576: }
578: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
579: {
580: Vec b1,x1;
581: PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
582: NEP_DEF_MATSHELL *matctx;
583: NEP_DEF_FUN_SOLVE solve=extop->solve;
584: PetscBLASInt one=1,szd_,n_,ldh_;
585: PetscInt nloc,i;
586: PetscMPIInt np,count;
588: if (extop->ref) VecZeroEntries(x);
589: if (extop->szd) {
590: x1 = solve->w[0]; b1 = solve->w[1];
591: VecGetArray(x,&xx);
592: VecPlaceArray(x1,xx);
593: VecGetArray(b,&bb);
594: VecPlaceArray(b1,bb);
595: } else {
596: b1 = b; x1 = x;
597: }
598: KSPSolve(extop->solve->ksp,b1,x1);
599: if (!extop->ref && extop->n && extop->szd) {
600: PetscBLASIntCast(extop->szd,&szd_);
601: PetscBLASIntCast(extop->n,&n_);
602: PetscBLASIntCast(extop->szd+1,&ldh_);
603: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
604: PetscMalloc2(extop->n,&b2,extop->n,&x2);
605: MPI_Comm_size(PetscObjectComm((PetscObject)b),&np);
606: for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
607: w = solve->work; w2 = solve->work+extop->n;
608: MatShellGetContext(solve->sincf?extop->MF:solve->T,&matctx);
609: PetscArraycpy(w2,b2,extop->n);
610: BVDotVec(extop->X,x1,w);
611: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
612: PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
613: if (extop->simpU) {
614: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
615: for (i=0;i<extop->n;i++) w[i] = x2[i];
616: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
617: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
618: BVMultVec(extop->X,-1.0,1.0,x1,w);
619: } else BVMultVec(solve->T_1U,-1.0,1.0,x1,x2);
620: for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
621: PetscMPIIntCast(extop->n,&count);
622: MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b));
623: }
624: if (extop->szd) {
625: VecResetArray(x1);
626: VecRestoreArray(x,&xx);
627: VecResetArray(b1);
628: VecRestoreArray(b,&bb);
629: if (!extop->ref && extop->n) PetscFree2(b2,x2);
630: }
631: return 0;
632: }
634: PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
635: {
636: extop->ref = ref;
637: return 0;
638: }
640: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
641: {
642: PetscInt j;
643: NEP_DEF_FUN_SOLVE solve;
645: if (!extop) return 0;
646: PetscFree(extop->H);
647: BVDestroy(&extop->X);
648: if (extop->szd) {
649: VecDestroy(&extop->w);
650: PetscFree3(extop->Hj,extop->XpX,extop->bc);
651: BVDestroy(&extop->W);
652: }
653: MatDestroy(&extop->MF);
654: MatDestroy(&extop->MJ);
655: if (extop->solve) {
656: solve = extop->solve;
657: if (extop->szd) {
658: if (!extop->simpU) BVDestroy(&solve->T_1U);
659: PetscFree2(solve->M,solve->work);
660: VecDestroy(&solve->w[0]);
661: VecDestroy(&solve->w[1]);
662: }
663: if (!solve->sincf) MatDestroy(&solve->T);
664: PetscFree(extop->solve);
665: }
666: if (extop->proj) {
667: if (extop->szd) {
668: for (j=0;j<extop->nep->nt;j++) MatDestroy(&extop->proj->V1pApX[j]);
669: MatDestroy(&extop->proj->XpV1);
670: PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work);
671: VecDestroy(&extop->proj->w);
672: BVDestroy(&extop->proj->V1);
673: }
674: PetscFree(extop->proj);
675: }
676: PetscFree(extop);
677: return 0;
678: }
680: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
681: {
682: NEP_EXT_OP op;
683: NEP_DEF_FUN_SOLVE solve;
684: PetscInt szd;
685: Vec x;
687: NEPDeflationReset(*extop);
688: PetscNew(&op);
689: *extop = op;
690: op->nep = nep;
691: op->n = 0;
692: op->szd = szd = sz-1;
693: op->max_midx = PetscMin(MAX_MINIDX,szd);
694: op->X = X;
695: if (!X) BVDuplicateResize(nep->V,sz,&op->X);
696: else PetscObjectReference((PetscObject)X);
697: PetscCalloc1(sz*sz,&(op)->H);
698: if (op->szd) {
699: BVGetColumn(op->X,0,&x);
700: VecDuplicate(x,&op->w);
701: BVRestoreColumn(op->X,0,&x);
702: op->simpU = PETSC_FALSE;
703: if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
704: /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
705: PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL);
706: } else {
707: op->simpU = PETSC_TRUE;
708: }
709: PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc);
710: BVDuplicateResize(op->X,op->szd,&op->W);
711: }
712: if (ksp) {
713: PetscNew(&solve);
714: op->solve = solve;
715: solve->ksp = ksp;
716: solve->sincf = sincfun;
717: solve->n = -1;
718: if (op->szd) {
719: if (!op->simpU) BVDuplicateResize(nep->V,szd,&solve->T_1U);
720: PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work);
721: BVCreateVec(nep->V,&solve->w[0]);
722: VecDuplicate(solve->w[0],&solve->w[1]);
723: }
724: }
725: return 0;
726: }
728: PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
729: {
730: Mat A,Ei;
731: PetscScalar *T,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
732: PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
733: const PetscScalar *E;
734: PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
735: PetscBLASInt inc=1,nv_,ldds_,dim_,szdk,szd_,n_,ldh_;
736: PetscMPIInt np;
737: NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
738: NEP_EXT_OP extop=proj->extop;
739: NEP nep=extop->nep;
741: DSGetDimensions(ds,&nv,NULL,NULL,NULL);
742: DSGetLeadingDimension(ds,&ldds);
743: DSGetMat(ds,mat,&A);
744: MatZeroEntries(A);
745: /* mat = V1^*T(lambda)V1 */
746: for (i=0;i<nep->nt;i++) {
747: if (deriv) FNEvaluateDerivative(nep->f[i],lambda,&alpha);
748: else FNEvaluateFunction(nep->f[i],lambda,&alpha);
749: DSGetMat(ds,DSMatExtra[i],&Ei);
750: MatAXPY(A,alpha,Ei,SAME_NONZERO_PATTERN);
751: DSRestoreMat(ds,DSMatExtra[i],&Ei);
752: }
753: DSRestoreMat(ds,mat,&A);
754: if (!extop->ref && extop->n) {
755: DSGetArray(ds,mat,&T);
756: n = extop->n;
757: szd = extop->szd;
758: PetscArrayzero(proj->work,proj->lwork);
759: PetscBLASIntCast(nv,&nv_);
760: PetscBLASIntCast(n,&n_);
761: PetscBLASIntCast(ldds,&ldds_);
762: PetscBLASIntCast(szd,&szd_);
763: PetscBLASIntCast(proj->dim,&dim_);
764: PetscBLASIntCast(extop->szd+1,&ldh_);
765: w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
766: nwork += 2*proj->dim*proj->dim;
768: /* mat = mat + V1^*U(lambda)V2 */
769: for (i=0;i<nep->nt;i++) {
770: if (extop->simpU) {
771: if (deriv) FNEvaluateDerivative(nep->f[i],lambda,&alpha);
772: else FNEvaluateFunction(nep->f[i],lambda,&alpha);
773: ww = w1; w = w2;
774: PetscArraycpy(ww,proj->V2,szd*nv);
775: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
776: for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
777: for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
778: alpha = -alpha;
779: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
780: if (deriv) {
781: PetscBLASIntCast(szd*nv,&szdk);
782: FNEvaluateFunction(nep->f[i],lambda,&alpha2);
783: PetscArraycpy(w,proj->V2,szd*nv);
784: for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
785: alpha2 = -alpha2;
786: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
787: alpha2 = 1.0;
788: PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
789: PetscCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
790: }
791: for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
792: } else {
793: NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd);
794: w = deriv?w2:w1; ww = deriv?w1:w2;
795: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
796: s = PetscSqrtReal(np);
797: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
798: }
799: MatDenseGetArrayRead(proj->V1pApX[i],&E);
800: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
801: MatDenseRestoreArrayRead(proj->V1pApX[i],&E);
802: }
804: /* mat = mat + V2^*A(lambda)V1 */
805: basisv = proj->work+nwork; nwork += szd;
806: hH = proj->work+nwork; nwork += szd*szd;
807: hHprev = proj->work+nwork; nwork += szd*szd;
808: AB = proj->work+nwork;
809: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv);
810: if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
811: for (j=0;j<n;j++)
812: for (i=0;i<n;i++)
813: for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
814: MatDenseGetArrayRead(proj->XpV1,&E);
815: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
816: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
817: MatDenseRestoreArrayRead(proj->XpV1,&E);
819: /* mat = mat + V2^*B(lambda)V2 */
820: PetscArrayzero(AB,szd*szd);
821: for (i=1;i<extop->midx;i++) {
822: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
823: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
824: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
825: pts = hHprev; hHprev = hH; hH = pts;
826: }
827: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
828: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
829: DSRestoreArray(ds,mat,&T);
830: }
831: return 0;
832: }
834: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
835: {
836: PetscInt k,j,n=extop->n,dim;
837: Vec v,ve;
838: BV V1;
839: Mat G;
840: NEP nep=extop->nep;
841: NEP_DEF_PROJECT proj;
843: NEPCheckSplit(extop->nep,1);
844: proj = extop->proj;
845: if (!proj) {
846: /* Initialize the projection data structure */
847: PetscNew(&proj);
848: extop->proj = proj;
849: proj->extop = extop;
850: BVGetSizes(Vext,NULL,NULL,&dim);
851: proj->dim = dim;
852: if (extop->szd) {
853: proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
854: PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work);
855: for (j=0;j<nep->nt;j++) MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]);
856: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1);
857: BVCreateVec(extop->X,&proj->w);
858: BVDuplicateResize(extop->X,proj->dim,&proj->V1);
859: }
860: DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj);
861: }
863: /* Split Vext in V1 and V2 */
864: if (extop->szd) {
865: for (j=j0;j<j1;j++) {
866: BVGetColumn(Vext,j,&ve);
867: BVGetColumn(proj->V1,j,&v);
868: NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE);
869: BVRestoreColumn(proj->V1,j,&v);
870: BVRestoreColumn(Vext,j,&ve);
871: }
872: V1 = proj->V1;
873: } else V1 = Vext;
875: /* Compute matrices V1^* A_i V1 */
876: BVSetActiveColumns(V1,j0,j1);
877: for (k=0;k<nep->nt;k++) {
878: DSGetMat(ds,DSMatExtra[k],&G);
879: BVMatProject(V1,nep->A[k],V1,G);
880: DSRestoreMat(ds,DSMatExtra[k],&G);
881: }
883: if (extop->n) {
884: if (extop->szd) {
885: /* Compute matrices V1^* A_i X and V1^* X */
886: BVSetActiveColumns(extop->W,0,n);
887: for (k=0;k<nep->nt;k++) {
888: BVMatMult(extop->X,nep->A[k],extop->W);
889: BVDot(extop->W,V1,proj->V1pApX[k]);
890: }
891: BVDot(V1,extop->X,proj->XpV1);
892: }
893: }
894: return 0;
895: }