Actual source code: dsgnhep.c
slepc-3.18.0 2022-10-01
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: /*
15: 1) Patterns of A and B
16: DS_STATE_RAW: DS_STATE_INTERM/CONDENSED
17: 0 n-1 0 n-1
18: ------------- -------------
19: 0 |* * * * * *| 0 |* * * * * *|
20: |* * * * * *| | * * * * *|
21: |* * * * * *| | * * * *|
22: |* * * * * *| | * * * *|
23: |* * * * * *| | * *|
24: n-1 |* * * * * *| n-1 | *|
25: ------------- -------------
27: 2) Moreover, P and Q are assumed to be the identity in DS_STATE_INTERMEDIATE.
28: */
30: static PetscErrorCode CleanDenseSchur(PetscInt n,PetscInt k,PetscScalar *S,PetscInt ldS,PetscScalar *T,PetscInt ldT,PetscScalar *X,PetscInt ldX,PetscScalar *Y,PetscInt ldY);
32: PetscErrorCode DSAllocate_GNHEP(DS ds,PetscInt ld)
33: {
34: DSAllocateMat_Private(ds,DS_MAT_A);
35: DSAllocateMat_Private(ds,DS_MAT_B);
36: DSAllocateMat_Private(ds,DS_MAT_Z);
37: DSAllocateMat_Private(ds,DS_MAT_Q);
38: PetscFree(ds->perm);
39: PetscMalloc1(ld,&ds->perm);
40: return 0;
41: }
43: PetscErrorCode DSView_GNHEP(DS ds,PetscViewer viewer)
44: {
45: PetscViewerFormat format;
47: PetscViewerGetFormat(viewer,&format);
48: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return 0;
49: DSViewMat(ds,viewer,DS_MAT_A);
50: DSViewMat(ds,viewer,DS_MAT_B);
51: if (ds->state>DS_STATE_INTERMEDIATE) {
52: DSViewMat(ds,viewer,DS_MAT_Z);
53: DSViewMat(ds,viewer,DS_MAT_Q);
54: }
55: if (ds->omat[DS_MAT_X]) DSViewMat(ds,viewer,DS_MAT_X);
56: if (ds->omat[DS_MAT_Y]) DSViewMat(ds,viewer,DS_MAT_Y);
57: return 0;
58: }
60: static PetscErrorCode DSVectors_GNHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
61: {
62: PetscInt i;
63: PetscBLASInt n,ld,mout,info,*select,mm,inc=1,cols=1,zero=0;
64: PetscScalar *X,*Y,*XY,*Z,*Q,*A,*B,fone=1.0,fzero=0.0;
65: PetscReal norm,done=1.0;
66: PetscBool iscomplex = PETSC_FALSE;
67: const char *side;
69: PetscBLASIntCast(ds->n,&n);
70: PetscBLASIntCast(ds->ld,&ld);
71: if (left) {
72: X = NULL;
73: MatDenseGetArray(ds->omat[DS_MAT_Y],&Y);
74: side = "L";
75: } else {
76: MatDenseGetArray(ds->omat[DS_MAT_X],&X);
77: Y = NULL;
78: side = "R";
79: }
80: XY = left? Y: X;
81: DSAllocateWork_Private(ds,0,0,ld);
82: select = ds->iwork;
83: for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;
84: if (ds->state <= DS_STATE_INTERMEDIATE) {
85: DSSetIdentity(ds,DS_MAT_Q);
86: DSSetIdentity(ds,DS_MAT_Z);
87: }
88: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
89: MatDenseGetArray(ds->omat[DS_MAT_B],&B);
90: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
91: MatDenseGetArray(ds->omat[DS_MAT_Z],&Z);
92: CleanDenseSchur(n,0,A,ld,B,ld,Q,ld,Z,ld);
93: if (ds->state < DS_STATE_CONDENSED) DSSetState(ds,DS_STATE_CONDENSED);
95: /* compute k-th eigenvector */
96: select[*k] = (PetscBLASInt)PETSC_TRUE;
97: #if defined(PETSC_USE_COMPLEX)
98: mm = 1;
99: DSAllocateWork_Private(ds,2*ld,2*ld,0);
100: PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,"S",select,&n,A,&ld,B,&ld,Y+(*k)*ld,&ld,X+(*k)*ld,&ld,&mm,&mout,ds->work,ds->rwork,&info));
101: #else
102: if ((*k)<n-1 && (A[ld*(*k)+(*k)+1] != 0.0 || B[ld*(*k)+(*k)+1] != 0.0)) iscomplex = PETSC_TRUE;
103: mm = iscomplex? 2: 1;
104: if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
105: DSAllocateWork_Private(ds,6*ld,0,0);
106: PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,"S",select,&n,A,&ld,B,&ld,Y+(*k)*ld,&ld,X+(*k)*ld,&ld,&mm,&mout,ds->work,&info));
107: #endif
108: SlepcCheckLapackInfo("tgevc",info);
110: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
111: MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
113: /* accumulate and normalize eigenvectors */
114: PetscArraycpy(ds->work,XY+(*k)*ld,mm*ld);
115: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&mm,&n,&fone,left?Z:Q,&ld,ds->work,&ld,&fzero,XY+(*k)*ld,&ld));
116: norm = BLASnrm2_(&n,XY+(*k)*ld,&inc);
117: #if !defined(PETSC_USE_COMPLEX)
118: if (iscomplex) {
119: norm = SlepcAbsEigenvalue(norm,BLASnrm2_(&n,XY+(*k+1)*ld,&inc));
120: cols = 2;
121: }
122: #endif
123: PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&norm,&done,&n,&cols,XY+(*k)*ld,&ld,&info));
124: SlepcCheckLapackInfo("lascl",info);
125: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
126: MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z);
128: /* set output arguments */
129: if (rnorm) {
130: if (iscomplex) *rnorm = SlepcAbsEigenvalue(XY[n-1+(*k)*ld],XY[n-1+(*k+1)*ld]);
131: else *rnorm = PetscAbsScalar(XY[n-1+(*k)*ld]);
132: }
133: if (iscomplex) (*k)++;
134: MatDenseRestoreArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&XY);
135: return 0;
136: }
138: static PetscErrorCode DSVectors_GNHEP_Eigen_All(DS ds,PetscBool left)
139: {
140: PetscInt i;
141: PetscBLASInt n,ld,mout,info,inc = 1;
142: PetscBool iscomplex;
143: PetscScalar *X,*Y,*XY,*Q,*Z,*A,*B,tmp;
144: PetscReal norm;
145: const char *side,*back;
147: PetscBLASIntCast(ds->n,&n);
148: PetscBLASIntCast(ds->ld,&ld);
149: if (left) {
150: X = NULL;
151: MatDenseGetArray(ds->omat[DS_MAT_Y],&Y);
152: side = "L";
153: } else {
154: MatDenseGetArray(ds->omat[DS_MAT_X],&X);
155: Y = NULL;
156: side = "R";
157: }
158: XY = left? Y: X;
159: if (ds->state <= DS_STATE_INTERMEDIATE) {
160: DSSetIdentity(ds,DS_MAT_Q);
161: DSSetIdentity(ds,DS_MAT_Z);
162: }
163: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
164: MatDenseGetArray(ds->omat[DS_MAT_B],&B);
165: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
166: MatDenseGetArray(ds->omat[DS_MAT_Z],&Z);
167: CleanDenseSchur(n,0,A,ld,B,ld,Q,ld,Z,ld);
168: if (ds->state>=DS_STATE_CONDENSED) {
169: /* DSSolve() has been called, backtransform with matrix Q */
170: back = "B";
171: PetscArraycpy(left?Y:X,left?Z:Q,ld*ld);
172: } else {
173: back = "A";
174: DSSetState(ds,DS_STATE_CONDENSED);
175: }
176: #if defined(PETSC_USE_COMPLEX)
177: DSAllocateWork_Private(ds,2*ld,2*ld,0);
178: PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,back,NULL,&n,A,&ld,B,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
179: #else
180: DSAllocateWork_Private(ds,6*ld,0,0);
181: PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,back,NULL,&n,A,&ld,B,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
182: #endif
183: SlepcCheckLapackInfo("tgevc",info);
184: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
185: MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z);
187: /* normalize eigenvectors */
188: for (i=0;i<n;i++) {
189: iscomplex = (i<n-1 && (A[i+1+i*ld]!=0.0 || B[i+1+i*ld]!=0.0))? PETSC_TRUE: PETSC_FALSE;
190: norm = BLASnrm2_(&n,XY+i*ld,&inc);
191: #if !defined(PETSC_USE_COMPLEX)
192: if (iscomplex) {
193: tmp = BLASnrm2_(&n,XY+(i+1)*ld,&inc);
194: norm = SlepcAbsEigenvalue(norm,tmp);
195: }
196: #endif
197: tmp = 1.0 / norm;
198: PetscCallBLAS("BLASscal",BLASscal_(&n,&tmp,XY+i*ld,&inc));
199: #if !defined(PETSC_USE_COMPLEX)
200: if (iscomplex) PetscCallBLAS("BLASscal",BLASscal_(&n,&tmp,XY+(i+1)*ld,&inc));
201: #endif
202: if (iscomplex) i++;
203: }
204: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
205: MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
206: MatDenseRestoreArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&XY);
207: return 0;
208: }
210: PetscErrorCode DSVectors_GNHEP(DS ds,DSMatType mat,PetscInt *k,PetscReal *rnorm)
211: {
212: switch (mat) {
213: case DS_MAT_X:
214: case DS_MAT_Y:
215: if (k) DSVectors_GNHEP_Eigen_Some(ds,k,rnorm,mat == DS_MAT_Y?PETSC_TRUE:PETSC_FALSE);
216: else DSVectors_GNHEP_Eigen_All(ds,mat == DS_MAT_Y?PETSC_TRUE:PETSC_FALSE);
217: break;
218: default:
219: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
220: }
221: return 0;
222: }
224: static PetscErrorCode DSSort_GNHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
225: {
226: PetscInt i;
227: PetscBLASInt info,n,ld,mout,lwork,liwork,*iwork,*selection,zero_=0,true_=1;
228: PetscScalar *S,*T,*Q,*Z,*work,*beta;
230: if (!ds->sc) return 0;
231: PetscBLASIntCast(ds->n,&n);
232: PetscBLASIntCast(ds->ld,&ld);
233: #if !defined(PETSC_USE_COMPLEX)
234: lwork = 4*n+16;
235: #else
236: lwork = 1;
237: #endif
238: liwork = 1;
239: DSAllocateWork_Private(ds,lwork+2*n,0,liwork+n);
240: beta = ds->work;
241: work = ds->work + n;
242: lwork = ds->lwork - n;
243: selection = ds->iwork;
244: iwork = ds->iwork + n;
245: liwork = ds->liwork - n;
246: /* Compute the selected eigenvalue to be in the leading position */
247: DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
248: PetscArrayzero(selection,n);
249: for (i=0; i<*k; i++) selection[ds->perm[i]] = 1;
250: MatDenseGetArray(ds->omat[DS_MAT_A],&S);
251: MatDenseGetArray(ds->omat[DS_MAT_B],&T);
252: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
253: MatDenseGetArray(ds->omat[DS_MAT_Z],&Z);
254: #if !defined(PETSC_USE_COMPLEX)
255: PetscCallBLAS("LAPACKtgsen",LAPACKtgsen_(&zero_,&true_,&true_,selection,&n,S,&ld,T,&ld,wr,wi,beta,Z,&ld,Q,&ld,&mout,NULL,NULL,NULL,work,&lwork,iwork,&liwork,&info));
256: #else
257: PetscCallBLAS("LAPACKtgsen",LAPACKtgsen_(&zero_,&true_,&true_,selection,&n,S,&ld,T,&ld,wr,beta,Z,&ld,Q,&ld,&mout,NULL,NULL,NULL,work,&lwork,iwork,&liwork,&info));
258: #endif
259: SlepcCheckLapackInfo("tgsen",info);
260: MatDenseRestoreArray(ds->omat[DS_MAT_A],&S);
261: MatDenseRestoreArray(ds->omat[DS_MAT_B],&T);
262: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
263: MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z);
264: *k = mout;
265: for (i=0;i<n;i++) {
266: if (beta[i]==0.0) wr[i] = (PetscRealPart(wr[i])>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
267: else wr[i] /= beta[i];
268: #if !defined(PETSC_USE_COMPLEX)
269: if (beta[i]==0.0) wi[i] = (wi[i]>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
270: else wi[i] /= beta[i];
271: #endif
272: }
273: return 0;
274: }
276: static PetscErrorCode DSSort_GNHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
277: {
278: PetscScalar re;
279: PetscInt i,j,pos,result;
280: PetscBLASInt ifst,ilst,info,n,ld,one=1;
281: PetscScalar *S,*T,*Z,*Q;
282: #if !defined(PETSC_USE_COMPLEX)
283: PetscBLASInt lwork;
284: PetscScalar *work,a,safmin,scale1,scale2,im;
285: #endif
287: if (!ds->sc) return 0;
288: PetscBLASIntCast(ds->n,&n);
289: PetscBLASIntCast(ds->ld,&ld);
290: MatDenseGetArray(ds->omat[DS_MAT_A],&S);
291: MatDenseGetArray(ds->omat[DS_MAT_B],&T);
292: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
293: MatDenseGetArray(ds->omat[DS_MAT_Z],&Z);
294: #if !defined(PETSC_USE_COMPLEX)
295: lwork = -1;
296: PetscCallBLAS("LAPACKtgexc",LAPACKtgexc_(&one,&one,&ld,NULL,&ld,NULL,&ld,NULL,&ld,NULL,&ld,&one,&one,&a,&lwork,&info));
297: SlepcCheckLapackInfo("tgexc",info);
298: safmin = LAPACKlamch_("S");
299: PetscBLASIntCast((PetscInt)a,&lwork);
300: DSAllocateWork_Private(ds,lwork,0,0);
301: work = ds->work;
302: #endif
303: /* selection sort */
304: for (i=ds->l;i<n-1;i++) {
305: re = wr[i];
306: #if !defined(PETSC_USE_COMPLEX)
307: im = wi[i];
308: #endif
309: pos = 0;
310: j = i+1; /* j points to the next eigenvalue */
311: #if !defined(PETSC_USE_COMPLEX)
312: if (im != 0) j=i+2;
313: #endif
314: /* find minimum eigenvalue */
315: for (;j<n;j++) {
316: #if !defined(PETSC_USE_COMPLEX)
317: SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
318: #else
319: SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
320: #endif
321: if (result > 0) {
322: re = wr[j];
323: #if !defined(PETSC_USE_COMPLEX)
324: im = wi[j];
325: #endif
326: pos = j;
327: }
328: #if !defined(PETSC_USE_COMPLEX)
329: if (wi[j] != 0) j++;
330: #endif
331: }
332: if (pos) {
333: /* interchange blocks */
334: PetscBLASIntCast(pos+1,&ifst);
335: PetscBLASIntCast(i+1,&ilst);
336: #if !defined(PETSC_USE_COMPLEX)
337: PetscCallBLAS("LAPACKtgexc",LAPACKtgexc_(&one,&one,&n,S,&ld,T,&ld,Z,&ld,Q,&ld,&ifst,&ilst,work,&lwork,&info));
338: #else
339: PetscCallBLAS("LAPACKtgexc",LAPACKtgexc_(&one,&one,&n,S,&ld,T,&ld,Z,&ld,Q,&ld,&ifst,&ilst,&info));
340: #endif
341: SlepcCheckLapackInfo("tgexc",info);
342: /* recover original eigenvalues from T and S matrices */
343: for (j=i;j<n;j++) {
344: #if !defined(PETSC_USE_COMPLEX)
345: if (j<n-1 && S[j*ld+j+1] != 0.0) {
346: /* complex conjugate eigenvalue */
347: PetscCallBLAS("LAPACKlag2",LAPACKlag2_(S+j*ld+j,&ld,T+j*ld+j,&ld,&safmin,&scale1,&scale2,&re,&a,&im));
348: wr[j] = re / scale1;
349: wi[j] = im / scale1;
350: wr[j+1] = a / scale2;
351: wi[j+1] = -wi[j];
352: j++;
353: } else
354: #endif
355: {
356: if (T[j*ld+j] == 0.0) wr[j] = (PetscRealPart(S[j*ld+j])>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
357: else wr[j] = S[j*ld+j] / T[j*ld+j];
358: #if !defined(PETSC_USE_COMPLEX)
359: wi[j] = 0.0;
360: #endif
361: }
362: }
363: }
364: #if !defined(PETSC_USE_COMPLEX)
365: if (wi[i] != 0.0) i++;
366: #endif
367: }
368: MatDenseRestoreArray(ds->omat[DS_MAT_A],&S);
369: MatDenseRestoreArray(ds->omat[DS_MAT_B],&T);
370: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
371: MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z);
372: return 0;
373: }
375: PetscErrorCode DSSort_GNHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
376: {
377: if (!rr || wr == rr) DSSort_GNHEP_Total(ds,wr,wi);
378: else DSSort_GNHEP_Arbitrary(ds,wr,wi,rr,ri,k);
379: return 0;
380: }
382: PetscErrorCode DSUpdateExtraRow_GNHEP(DS ds)
383: {
384: PetscInt i;
385: PetscBLASInt n,ld,incx=1;
386: PetscScalar *A,*B,*x,*y,one=1.0,zero=0.0;
387: const PetscScalar *Q;
389: PetscBLASIntCast(ds->n,&n);
390: PetscBLASIntCast(ds->ld,&ld);
391: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
392: MatDenseGetArray(ds->omat[DS_MAT_B],&B);
393: MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q);
394: DSAllocateWork_Private(ds,2*ld,0,0);
395: x = ds->work;
396: y = ds->work+ld;
397: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
398: PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
399: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
400: for (i=0;i<n;i++) x[i] = PetscConj(B[n+i*ld]);
401: PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
402: for (i=0;i<n;i++) B[n+i*ld] = PetscConj(y[i]);
403: ds->k = n;
404: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
405: MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
406: MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q);
407: return 0;
408: }
410: /*
411: Write zeros from the column k to n in the lower triangular part of the
412: matrices S and T, and inside 2-by-2 diagonal blocks of T in order to
413: make (S,T) a valid Schur decompositon.
414: */
415: static PetscErrorCode CleanDenseSchur(PetscInt n,PetscInt k,PetscScalar *S,PetscInt ldS,PetscScalar *T,PetscInt ldT,PetscScalar *X,PetscInt ldX,PetscScalar *Y,PetscInt ldY)
416: {
417: PetscInt i;
418: #if defined(PETSC_USE_COMPLEX)
419: PetscInt j;
420: PetscScalar s;
421: #else
422: PetscBLASInt ldS_,ldT_,n_i,n_i_2,one=1,n_,i_2,i_;
423: PetscScalar b11,b22,sr,cr,sl,cl;
424: #endif
426: #if defined(PETSC_USE_COMPLEX)
427: for (i=k; i<n; i++) {
428: /* Some functions need the diagonal elements in T be real */
429: if (T && PetscImaginaryPart(T[ldT*i+i]) != 0.0) {
430: s = PetscConj(T[ldT*i+i])/PetscAbsScalar(T[ldT*i+i]);
431: for (j=0;j<=i;j++) {
432: T[ldT*i+j] *= s;
433: S[ldS*i+j] *= s;
434: }
435: T[ldT*i+i] = PetscRealPart(T[ldT*i+i]);
436: if (X) for (j=0;j<n;j++) X[ldX*i+j] *= s;
437: }
438: j = i+1;
439: if (j<n) {
440: S[ldS*i+j] = 0.0;
441: if (T) T[ldT*i+j] = 0.0;
442: }
443: }
444: #else
445: PetscBLASIntCast(ldS,&ldS_);
446: PetscBLASIntCast(ldT,&ldT_);
447: PetscBLASIntCast(n,&n_);
448: for (i=k;i<n-1;i++) {
449: if (S[ldS*i+i+1] != 0.0) {
450: /* Check if T(i+1,i) and T(i,i+1) are zero */
451: if (T[ldT*(i+1)+i] != 0.0 || T[ldT*i+i+1] != 0.0) {
452: /* Check if T(i+1,i) and T(i,i+1) are negligible */
453: if (PetscAbs(T[ldT*(i+1)+i])+PetscAbs(T[ldT*i+i+1]) < (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1]))*PETSC_MACHINE_EPSILON) {
454: T[ldT*i+i+1] = 0.0;
455: T[ldT*(i+1)+i] = 0.0;
456: } else {
457: /* If one of T(i+1,i) or T(i,i+1) is negligible, we make zero the other element */
458: if (PetscAbs(T[ldT*i+i+1]) < (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1])+PetscAbs(T[ldT*(i+1)+i]))*PETSC_MACHINE_EPSILON) {
459: PetscCallBLAS("LAPACKlasv2",LAPACKlasv2_(&T[ldT*i+i],&T[ldT*(i+1)+i],&T[ldT*(i+1)+i+1],&b22,&b11,&sl,&cl,&sr,&cr));
460: } else if (PetscAbs(T[ldT*(i+1)+i]) < (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1])+PetscAbs(T[ldT*i+i+1]))*PETSC_MACHINE_EPSILON) {
461: PetscCallBLAS("LAPACKlasv2",LAPACKlasv2_(&T[ldT*i+i],&T[ldT*i+i+1],&T[ldT*(i+1)+i+1],&b22,&b11,&sr,&cr,&sl,&cl));
462: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported format. Call DSSolve before this function");
463: PetscBLASIntCast(n-i,&n_i);
464: n_i_2 = n_i - 2;
465: PetscBLASIntCast(i+2,&i_2);
466: PetscBLASIntCast(i,&i_);
467: if (b11 < 0.0) {
468: cr = -cr; sr = -sr;
469: b11 = -b11; b22 = -b22;
470: }
471: PetscCallBLAS("BLASrot",BLASrot_(&n_i,&S[ldS*i+i],&ldS_,&S[ldS*i+i+1],&ldS_,&cl,&sl));
472: PetscCallBLAS("BLASrot",BLASrot_(&i_2,&S[ldS*i],&one,&S[ldS*(i+1)],&one,&cr,&sr));
473: PetscCallBLAS("BLASrot",BLASrot_(&n_i_2,&T[ldT*(i+2)+i],&ldT_,&T[ldT*(i+2)+i+1],&ldT_,&cl,&sl));
474: PetscCallBLAS("BLASrot",BLASrot_(&i_,&T[ldT*i],&one,&T[ldT*(i+1)],&one,&cr,&sr));
475: if (X) PetscCallBLAS("BLASrot",BLASrot_(&n_,&X[ldX*i],&one,&X[ldX*(i+1)],&one,&cr,&sr));
476: if (Y) PetscCallBLAS("BLASrot",BLASrot_(&n_,&Y[ldY*i],&one,&Y[ldY*(i+1)],&one,&cl,&sl));
477: T[ldT*i+i] = b11; T[ldT*i+i+1] = 0.0;
478: T[ldT*(i+1)+i] = 0.0; T[ldT*(i+1)+i+1] = b22;
479: }
480: }
481: i++;
482: }
483: }
484: #endif
485: return 0;
486: }
488: PetscErrorCode DSSolve_GNHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
489: {
490: PetscScalar *work,*beta,a;
491: PetscInt i;
492: PetscBLASInt lwork,info,n,ld,iaux;
493: PetscScalar *A,*B,*Z,*Q;
495: #if !defined(PETSC_USE_COMPLEX)
497: #endif
498: PetscBLASIntCast(ds->n,&n);
499: PetscBLASIntCast(ds->ld,&ld);
500: lwork = -1;
501: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
502: MatDenseGetArray(ds->omat[DS_MAT_B],&B);
503: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
504: MatDenseGetArray(ds->omat[DS_MAT_Z],&Z);
505: #if !defined(PETSC_USE_COMPLEX)
506: PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,wi,NULL,Z,&ld,Q,&ld,&a,&lwork,NULL,&info));
507: PetscBLASIntCast((PetscInt)a,&lwork);
508: DSAllocateWork_Private(ds,lwork+ld,0,0);
509: beta = ds->work;
510: work = beta+ds->n;
511: PetscBLASIntCast(ds->lwork-ds->n,&lwork);
512: PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,wi,beta,Z,&ld,Q,&ld,work,&lwork,NULL,&info));
513: #else
514: PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,NULL,Z,&ld,Q,&ld,&a,&lwork,NULL,NULL,&info));
515: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
516: DSAllocateWork_Private(ds,lwork+ld,8*ld,0);
517: beta = ds->work;
518: work = beta+ds->n;
519: PetscBLASIntCast(ds->lwork-ds->n,&lwork);
520: PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,beta,Z,&ld,Q,&ld,work,&lwork,ds->rwork,NULL,&info));
521: #endif
522: SlepcCheckLapackInfo("gges",info);
523: for (i=0;i<n;i++) {
524: if (beta[i]==0.0) wr[i] = (PetscRealPart(wr[i])>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
525: else wr[i] /= beta[i];
526: #if !defined(PETSC_USE_COMPLEX)
527: if (beta[i]==0.0) wi[i] = (wi[i]>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
528: else wi[i] /= beta[i];
529: #else
530: if (wi) wi[i] = 0.0;
531: #endif
532: }
533: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
534: MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
535: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
536: MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z);
537: return 0;
538: }
540: PetscErrorCode DSSynchronize_GNHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
541: {
542: PetscInt ld=ds->ld,l=ds->l,k;
543: PetscMPIInt n,rank,off=0,size,ldn;
544: PetscScalar *A,*B,*Q,*Z;
546: k = 2*(ds->n-l)*ld;
547: if (ds->state>DS_STATE_RAW) k += 2*(ds->n-l)*ld;
548: if (eigr) k += (ds->n-l);
549: if (eigi) k += (ds->n-l);
550: DSAllocateWork_Private(ds,k,0,0);
551: PetscMPIIntCast(k*sizeof(PetscScalar),&size);
552: PetscMPIIntCast(ds->n-l,&n);
553: PetscMPIIntCast(ld*(ds->n-l),&ldn);
554: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
555: MatDenseGetArray(ds->omat[DS_MAT_B],&B);
556: if (ds->state>DS_STATE_RAW) {
557: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
558: MatDenseGetArray(ds->omat[DS_MAT_Z],&Z);
559: }
560: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
561: if (!rank) {
562: MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
563: MPI_Pack(B+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
564: if (ds->state>DS_STATE_RAW) {
565: MPI_Pack(Q+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
566: MPI_Pack(Z+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
567: }
568: if (eigr) MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
569: #if !defined(PETSC_USE_COMPLEX)
570: if (eigi) MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
571: #endif
572: }
573: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
574: if (rank) {
575: MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
576: MPI_Unpack(ds->work,size,&off,B+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
577: if (ds->state>DS_STATE_RAW) {
578: MPI_Unpack(ds->work,size,&off,Q+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
579: MPI_Unpack(ds->work,size,&off,Z+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
580: }
581: if (eigr) MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
582: #if !defined(PETSC_USE_COMPLEX)
583: if (eigi) MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
584: #endif
585: }
586: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
587: MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
588: if (ds->state>DS_STATE_RAW) {
589: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
590: MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z);
591: }
592: return 0;
593: }
595: PetscErrorCode DSTruncate_GNHEP(DS ds,PetscInt n,PetscBool trim)
596: {
597: PetscInt i,ld=ds->ld,l=ds->l;
598: PetscScalar *A,*B;
600: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
601: MatDenseGetArray(ds->omat[DS_MAT_B],&B);
602: #if defined(PETSC_USE_DEBUG)
603: /* make sure diagonal 2x2 block is not broken */
605: #endif
606: if (trim) {
607: if (ds->extrarow) { /* clean extra row */
608: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
609: for (i=l;i<ds->n;i++) B[ds->n+i*ld] = 0.0;
610: }
611: ds->l = 0;
612: ds->k = 0;
613: ds->n = n;
614: ds->t = ds->n; /* truncated length equal to the new dimension */
615: } else {
616: if (ds->extrarow && ds->k==ds->n) {
617: /* copy entries of extra row to the new position, then clean last row */
618: for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
619: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
620: for (i=l;i<n;i++) B[n+i*ld] = B[ds->n+i*ld];
621: for (i=l;i<ds->n;i++) B[ds->n+i*ld] = 0.0;
622: }
623: ds->k = (ds->extrarow)? n: 0;
624: ds->t = ds->n; /* truncated length equal to previous dimension */
625: ds->n = n;
626: }
627: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
628: MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
629: return 0;
630: }
632: /*MC
633: DSGNHEP - Dense Generalized Non-Hermitian Eigenvalue Problem.
635: Level: beginner
637: Notes:
638: The problem is expressed as A*X = B*X*Lambda, where (A,B) is the input
639: matrix pencil. Lambda is a diagonal matrix whose diagonal elements are the
640: arguments of DSSolve(). After solve, (A,B) is overwritten with the
641: generalized (real) Schur form (S,T) = (Z'*A*Q,Z'*B*Q), with the first
642: matrix being upper quasi-triangular and the second one triangular.
644: Used DS matrices:
645: + DS_MAT_A - first problem matrix
646: . DS_MAT_B - second problem matrix
647: . DS_MAT_Q - first orthogonal/unitary transformation that reduces to
648: generalized (real) Schur form
649: - DS_MAT_Z - second orthogonal/unitary transformation that reduces to
650: generalized (real) Schur form
652: Implemented methods:
653: . 0 - QZ iteration (_gges)
655: .seealso: DSCreate(), DSSetType(), DSType
656: M*/
657: SLEPC_EXTERN PetscErrorCode DSCreate_GNHEP(DS ds)
658: {
659: ds->ops->allocate = DSAllocate_GNHEP;
660: ds->ops->view = DSView_GNHEP;
661: ds->ops->vectors = DSVectors_GNHEP;
662: ds->ops->solve[0] = DSSolve_GNHEP;
663: ds->ops->sort = DSSort_GNHEP;
664: ds->ops->synchronize = DSSynchronize_GNHEP;
665: ds->ops->gettruncatesize = DSGetTruncateSize_Default;
666: ds->ops->truncate = DSTruncate_GNHEP;
667: ds->ops->update = DSUpdateExtraRow_GNHEP;
668: return 0;
669: }