Actual source code: dsgsvd.c
slepc-3.18.0 2022-10-01
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: #include <slepc/private/dsimpl.h>
11: #include <slepcblaslapack.h>
13: typedef struct {
14: PetscInt m; /* number of columns */
15: PetscInt p; /* number of rows of B */
16: PetscInt tm; /* number of rows of X after truncating */
17: PetscInt tp; /* number of rows of V after truncating */
18: } DS_GSVD;
20: PetscErrorCode DSAllocate_GSVD(DS ds,PetscInt ld)
21: {
22: DSAllocateMat_Private(ds,DS_MAT_A);
23: DSAllocateMat_Private(ds,DS_MAT_B);
24: DSAllocateMat_Private(ds,DS_MAT_X);
25: DSAllocateMat_Private(ds,DS_MAT_U);
26: DSAllocateMat_Private(ds,DS_MAT_V);
27: DSAllocateMat_Private(ds,DS_MAT_T);
28: DSAllocateMat_Private(ds,DS_MAT_D);
29: PetscFree(ds->perm);
30: PetscMalloc1(ld,&ds->perm);
31: return 0;
32: }
34: /*
35: In compact form, A is either in form (a) or (b):
37: (a) (b)
38: lower bidiagonal with upper arrow (n=m+1) square upper bidiagonal with upper arrow (n=m)
39: 0 l k m-1
40: ----------------------------------------- 0 l k m-1
41: |* . | -----------------------------------------
42: | * . | |* . |
43: | * . | | * . |
44: | * . | | * . |
45: l |. . . . o o | l |. . . o o |
46: | o o | | o o |
47: | o o | | o o |
48: | o o | | o o |
49: | o o | | o o |
50: | o o | | o o |
51: k |. . . . . . . . . . o | k |. . . . . . . . . o x |
52: | x x | | x x |
53: | x x | | x x |
54: | x x | | x x |
55: | x x | | x x |
56: | x x | | x x |
57: | x x | | x x |
58: | x x | | x x |
59: | x x | | x x |
60: | x x| | x x|
61: n-1 | x| n-1 | x|
62: ----------------------------------------- -----------------------------------------
64: and B is square bidiagonal with upper arrow (p=m)
66: 0 l k m-1
67: -----------------------------------------
68: |* . |
69: | * . |
70: | * . |
71: | * . |
72: l |. . . . o o |
73: | o o |
74: | o o |
75: | o o |
76: | o o |
77: | o o |
78: k |. . . . . . . . . . o x |
79: | x x |
80: | x x |
81: | x x |
82: | x x |
83: | x x |
84: | x x |
85: | x x |
86: | x x|
87: p-1 | x|
88: ----------------------------------------
89: */
90: PetscErrorCode DSView_GSVD(DS ds,PetscViewer viewer)
91: {
92: DS_GSVD *ctx = (DS_GSVD*)ds->data;
93: PetscViewerFormat format;
94: PetscInt i,j,r,k=ds->k,n=ds->n,m=ctx->m,p=ctx->p,rowsa,rowsb,colsa,colsb;
95: PetscReal *T,*S,value;
97: PetscViewerGetFormat(viewer,&format);
98: if (format == PETSC_VIEWER_ASCII_INFO) return 0;
99: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
100: PetscViewerASCIIPrintf(viewer,"number of columns: %" PetscInt_FMT "\n",m);
101: PetscViewerASCIIPrintf(viewer,"number of rows of B: %" PetscInt_FMT "\n",p);
102: return 0;
103: }
105: if (ds->compact) {
106: DSGetArrayReal(ds,DS_MAT_T,&T);
107: DSGetArrayReal(ds,DS_MAT_D,&S);
108: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
109: rowsa = n;
110: colsa = ds->extrarow? m+1: m;
111: rowsb = p;
112: colsb = ds->extrarow? m+1: m;
113: if (format == PETSC_VIEWER_ASCII_MATLAB) {
114: PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rowsa,colsa);
115: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",2*ds->n);
116: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
117: for (i=0;i<PetscMin(rowsa,colsa);i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,i+1,(double)T[i]);
118: for (i=0;i<k;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,k+1,(double)T[i+ds->ld]);
119: if (n>m) { /* A lower bidiagonal */
120: for (i=k;i<rowsa-1;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+2,i+1,(double)T[i+ds->ld]);
121: } else { /* A (square) upper bidiagonal */
122: for (i=k;i<colsa-1;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,i+2,(double)T[i+ds->ld]);
123: }
124: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
125: PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rowsb,colsb);
126: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",2*ds->n);
127: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
128: for (i=0;i<rowsb;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,i+1,(double)S[i]);
129: for (i=0;i<colsb-1;i++) {
130: r = PetscMax(i+2,ds->k+1);
131: PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,r,(double)T[i+2*ds->ld]);
132: }
133: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_D]);
134: } else {
135: PetscViewerASCIIPrintf(viewer,"Matrix %s =\n",DSMatName[DS_MAT_T]);
136: for (i=0;i<rowsa;i++) {
137: for (j=0;j<colsa;j++) {
138: if (i==j) value = T[i];
139: else if (i<ds->k && j==ds->k) value = T[i+ds->ld];
140: else if (n>m && i==j+1 && i>ds->k) value = T[j+ds->ld];
141: else if (n<=m && i+1==j && i>=ds->k) value = T[i+ds->ld];
142: else value = 0.0;
143: PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
144: }
145: PetscViewerASCIIPrintf(viewer,"\n");
146: }
147: PetscViewerASCIIPrintf(viewer,"Matrix %s =\n",DSMatName[DS_MAT_D]);
148: for (i=0;i<rowsb;i++) {
149: for (j=0;j<colsb;j++) {
150: if (i==j) value = S[i];
151: else if (i<ds->k && j==ds->k) value = T[PetscMin(i,j)+2*ds->ld];
152: else if (i+1==j && i>=ds->k) value = T[i+2*ds->ld];
153: else value = 0.0;
154: PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
155: }
156: PetscViewerASCIIPrintf(viewer,"\n");
157: }
158: }
159: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
160: PetscViewerFlush(viewer);
161: DSRestoreArrayReal(ds,DS_MAT_T,&T);
162: DSRestoreArrayReal(ds,DS_MAT_D,&S);
163: } else {
164: DSViewMat(ds,viewer,DS_MAT_A);
165: DSViewMat(ds,viewer,DS_MAT_B);
166: }
167: if (ds->state>DS_STATE_INTERMEDIATE) {
168: DSViewMat(ds,viewer,DS_MAT_X);
169: DSViewMat(ds,viewer,DS_MAT_U);
170: DSViewMat(ds,viewer,DS_MAT_V);
171: }
172: return 0;
173: }
175: PetscErrorCode DSVectors_GSVD(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
176: {
177: switch (mat) {
178: case DS_MAT_U:
179: case DS_MAT_V:
180: if (rnorm) *rnorm = 0.0;
181: break;
182: case DS_MAT_X:
183: break;
184: default:
185: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
186: }
187: return 0;
188: }
190: PetscErrorCode DSSort_GSVD(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
191: {
192: DS_GSVD *ctx = (DS_GSVD*)ds->data;
193: PetscInt t,l,ld=ds->ld,i,*perm,*perm2;
194: PetscReal *T=NULL,*D=NULL,*eig;
195: PetscScalar *A=NULL,*B=NULL;
196: PetscBool compact=ds->compact;
198: if (!ds->sc) return 0;
200: l = ds->l;
201: t = ds->t;
202: perm = ds->perm;
203: PetscMalloc2(t,&eig,t,&perm2);
204: if (compact) {
205: DSGetArrayReal(ds,DS_MAT_T,&T);
206: DSGetArrayReal(ds,DS_MAT_D,&D);
207: for (i=0;i<t;i++) eig[i] = (D[i]==0)?PETSC_INFINITY:T[i]/D[i];
208: } else {
209: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
210: MatDenseGetArray(ds->omat[DS_MAT_B],&B);
211: for (i=0;i<t;i++) eig[i] = (B[i+i*ld]==0)?PETSC_INFINITY:PetscRealPart(A[i+i*ld])/PetscRealPart(B[i*(1+ld)]);
212: }
213: DSSortEigenvaluesReal_Private(ds,eig,perm);
214: PetscArraycpy(perm2,perm,t);
215: for (i=l;i<t;i++) wr[i] = eig[perm[i]];
216: if (compact) {
217: PetscArraycpy(eig,T,t);
218: for (i=l;i<t;i++) T[i] = eig[perm[i]];
219: PetscArraycpy(eig,D,t);
220: for (i=l;i<t;i++) D[i] = eig[perm[i]];
221: DSRestoreArrayReal(ds,DS_MAT_T,&T);
222: DSRestoreArrayReal(ds,DS_MAT_D,&D);
223: } else {
224: for (i=l;i<t;i++) eig[i] = PetscRealPart(A[i*(1+ld)]);
225: for (i=l;i<t;i++) A[i*(1+ld)] = eig[perm[i]];
226: for (i=l;i<t;i++) eig[i] = PetscRealPart(B[i*(1+ld)]);
227: for (i=l;i<t;i++) B[i*(1+ld)] = eig[perm[i]];
228: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
229: MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
230: }
231: DSPermuteColumns_Private(ds,l,t,ds->n,DS_MAT_U,perm2);
232: PetscArraycpy(perm2,perm,t);
233: DSPermuteColumns_Private(ds,l,t,ctx->m,DS_MAT_X,perm2);
234: DSPermuteColumns_Private(ds,l,t,ctx->p,DS_MAT_V,perm);
235: PetscFree2(eig,perm2);
236: return 0;
237: }
239: PetscErrorCode DSUpdateExtraRow_GSVD(DS ds)
240: {
241: DS_GSVD *ctx = (DS_GSVD*)ds->data;
242: PetscInt i;
243: PetscBLASInt n=0,m=0,ld=0;
244: const PetscScalar *U,*V;
245: PetscReal *T,*e,*f,alpha,beta,betah;
249: PetscBLASIntCast(ds->n,&n);
250: PetscBLASIntCast(ctx->m,&m);
251: PetscBLASIntCast(ds->ld,&ld);
252: DSGetArrayReal(ds,DS_MAT_T,&T);
253: e = T+ld;
254: f = T+2*ld;
255: MatDenseGetArrayRead(ds->omat[DS_MAT_U],&U);
256: MatDenseGetArrayRead(ds->omat[DS_MAT_V],&V);
257: if (n<=m) { /* upper variant, A is square upper bidiagonal */
258: beta = e[m-1]; /* in compact, we assume all entries are zero except the last one */
259: betah = f[m-1];
260: for (i=0;i<m;i++) {
261: e[i] = PetscRealPart(beta*U[m-1+i*ld]);
262: f[i] = PetscRealPart(betah*V[m-1+i*ld]);
263: }
264: } else { /* lower variant, A is (m+1)xm lower bidiagonal */
265: alpha = T[m];
266: betah = f[m-1];
267: for (i=0;i<m;i++) {
268: e[i] = PetscRealPart(alpha*U[m+i*ld]);
269: f[i] = PetscRealPart(betah*V[m-1+i*ld]);
270: }
271: T[m] = PetscRealPart(alpha*U[m+m*ld]);
272: }
273: ds->k = m;
274: MatDenseRestoreArrayRead(ds->omat[DS_MAT_U],&U);
275: MatDenseRestoreArrayRead(ds->omat[DS_MAT_V],&V);
276: DSRestoreArrayReal(ds,DS_MAT_T,&T);
277: return 0;
278: }
280: PetscErrorCode DSTruncate_GSVD(DS ds,PetscInt n,PetscBool trim)
281: {
282: DS_GSVD *ctx = (DS_GSVD*)ds->data;
283: PetscScalar *U;
284: PetscReal *T;
285: PetscInt i,m=ctx->m,ld=ds->ld;
286: PetscBool lower=(ds->n>ctx->m)?PETSC_TRUE:PETSC_FALSE;
289: if (trim) {
290: ds->l = 0;
291: ds->k = 0;
292: ds->n = lower? n+1: n;
293: ctx->m = n;
294: ctx->p = n;
295: ds->t = ds->n; /* truncated length equal to the new dimension */
296: ctx->tm = ctx->m; /* must also keep the previous dimension of X */
297: ctx->tp = ctx->p; /* must also keep the previous dimension of V */
298: } else {
299: if (lower) {
300: /* move value of diagonal element of arrow (alpha) */
301: DSGetArrayReal(ds,DS_MAT_T,&T);
302: T[n] = T[m];
303: DSRestoreArrayReal(ds,DS_MAT_T,&T);
304: /* copy last column of U so that it updates the next initial vector of U1 */
305: MatDenseGetArray(ds->omat[DS_MAT_U],&U);
306: for (i=0;i<=m;i++) U[i+n*ld] = U[i+m*ld];
307: MatDenseRestoreArray(ds->omat[DS_MAT_U],&U);
308: }
309: ds->k = (ds->extrarow)? n: 0;
310: ds->t = ds->n; /* truncated length equal to previous dimension */
311: ctx->tm = ctx->m; /* must also keep the previous dimension of X */
312: ctx->tp = ctx->p; /* must also keep the previous dimension of V */
313: ds->n = lower? n+1: n;
314: ctx->m = n;
315: ctx->p = n;
316: }
317: return 0;
318: }
320: static PetscErrorCode DSSwitchFormat_GSVD(DS ds)
321: {
322: DS_GSVD *ctx = (DS_GSVD*)ds->data;
323: PetscReal *T,*D;
324: PetscScalar *A,*B;
325: PetscInt i,n=ds->n,k=ds->k,ld=ds->ld,m=ctx->m;
328: /* switch from compact (arrow) to dense storage */
329: /* bidiagonal associated to B is stored in D and T+2*ld */
330: MatDenseGetArrayWrite(ds->omat[DS_MAT_A],&A);
331: MatDenseGetArrayWrite(ds->omat[DS_MAT_B],&B);
332: DSGetArrayReal(ds,DS_MAT_T,&T);
333: DSGetArrayReal(ds,DS_MAT_D,&D);
334: PetscArrayzero(A,ld*ld);
335: PetscArrayzero(B,ld*ld);
336: for (i=0;i<k;i++) {
337: A[i+i*ld] = T[i];
338: A[i+k*ld] = T[i+ld];
339: B[i+i*ld] = D[i];
340: B[i+k*ld] = T[i+2*ld];
341: }
342: /* B is upper bidiagonal */
343: B[k+k*ld] = D[k];
344: for (i=k+1;i<m;i++) {
345: B[i+i*ld] = D[i];
346: B[i-1+i*ld] = T[i-1+2*ld];
347: }
348: /* A can be upper (square) or lower bidiagonal */
349: for (i=k;i<m;i++) A[i+i*ld] = T[i];
350: if (n>m) for (i=k;i<m;i++) A[i+1+i*ld] = T[i+ld];
351: else for (i=k+1;i<m;i++) A[i-1+i*ld] = T[i-1+ld];
352: MatDenseRestoreArrayWrite(ds->omat[DS_MAT_A],&A);
353: MatDenseRestoreArrayWrite(ds->omat[DS_MAT_B],&B);
354: DSRestoreArrayReal(ds,DS_MAT_T,&T);
355: DSRestoreArrayReal(ds,DS_MAT_D,&D);
356: return 0;
357: }
359: /*
360: Compact format is used when [A;B] has orthonormal columns.
361: In this case R=I and the GSVD of (A,B) is the CS decomposition
362: */
363: PetscErrorCode DSSolve_GSVD(DS ds,PetscScalar *wr,PetscScalar *wi)
364: {
365: DS_GSVD *ctx = (DS_GSVD*)ds->data;
366: PetscInt i,j;
367: PetscBLASInt n1,m1,info,lc = 0,n = 0,m = 0,p = 0,p1,l,k,q,ld,off,lwork,r;
368: PetscScalar *A,*B,*X,*U,*V,sone=1.0,smone=-1.0;
369: PetscReal *alpha,*beta,*T,*D;
370: #if !defined(SLEPC_MISSING_LAPACK_GGSVD3)
371: PetscScalar a,dummy;
372: PetscReal rdummy;
373: PetscBLASInt idummy;
374: #endif
377: PetscBLASIntCast(ds->n,&m);
378: PetscBLASIntCast(ctx->m,&n);
379: PetscBLASIntCast(ctx->p,&p);
380: PetscBLASIntCast(ds->l,&lc);
382: /* In compact storage B is always nxn and A can be either nxn or (n+1)xn */
384: PetscBLASIntCast(ds->ld,&ld);
385: n1 = n-lc; /* n1 = size of leading block, excl. locked + size of trailing block */
386: m1 = m-lc;
387: p1 = p-lc;
388: off = lc+lc*ld;
389: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
390: MatDenseGetArray(ds->omat[DS_MAT_B],&B);
391: MatDenseGetArray(ds->omat[DS_MAT_X],&X);
392: MatDenseGetArray(ds->omat[DS_MAT_U],&U);
393: MatDenseGetArray(ds->omat[DS_MAT_V],&V);
394: PetscArrayzero(X,ld*ld);
395: for (i=0;i<lc;i++) X[i+i*ld] = 1.0;
396: PetscArrayzero(U,ld*ld);
397: for (i=0;i<lc;i++) U[i+i*ld] = 1.0;
398: PetscArrayzero(V,ld*ld);
399: for (i=0;i<lc;i++) V[i+i*ld] = 1.0;
400: if (ds->compact) DSSwitchFormat_GSVD(ds);
402: #if !defined(SLEPC_MISSING_LAPACK_GGSVD3)
403: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
404: /* workspace query and memory allocation */
405: lwork = -1;
406: #if !defined (PETSC_USE_COMPLEX)
407: PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,&dummy,&ld,&dummy,&ld,&rdummy,&rdummy,&dummy,&ld,&dummy,&ld,&dummy,&ld,&a,&lwork,&idummy,&info));
408: PetscBLASIntCast((PetscInt)a,&lwork);
409: #else
410: PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,&dummy,&ld,&dummy,&ld,&rdummy,&rdummy,&dummy,&ld,&dummy,&ld,&dummy,&ld,&a,&lwork,&rdummy,&idummy,&info));
411: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
412: #endif
414: #if !defined (PETSC_USE_COMPLEX)
415: DSAllocateWork_Private(ds,lwork,2*ds->ld,ds->ld);
416: alpha = ds->rwork;
417: beta = ds->rwork+ds->ld;
418: PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,&lwork,ds->iwork,&info));
419: #else
420: DSAllocateWork_Private(ds,lwork,4*ds->ld,ds->ld);
421: alpha = ds->rwork+2*ds->ld;
422: beta = ds->rwork+3*ds->ld;
423: PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,&lwork,ds->rwork,ds->iwork,&info));
424: #endif
425: PetscFPTrapPop();
426: SlepcCheckLapackInfo("ggsvd3",info);
428: #else /* defined(SLEPC_MISSING_LAPACK_GGSVD3) */
430: lwork = PetscMax(PetscMax(3*n,m),p)+n;
431: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
432: #if !defined (PETSC_USE_COMPLEX)
433: DSAllocateWork_Private(ds,lwork,2*ds->ld,ds->ld);
434: alpha = ds->rwork;
435: beta = ds->rwork+ds->ld;
436: PetscCallBLAS("LAPACKggsvd",LAPACKggsvd_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,ds->iwork,&info));
437: #else
438: DSAllocateWork_Private(ds,lwork,4*ds->ld,ds->ld);
439: alpha = ds->rwork+2*ds->ld;
440: beta = ds->rwork+3*ds->ld;
441: PetscCallBLAS("LAPACKggsvd",LAPACKggsvd_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,ds->rwork,ds->iwork,&info));
442: #endif
443: PetscFPTrapPop();
444: SlepcCheckLapackInfo("ggsvd",info);
446: #endif
449: if (ds->compact) {
450: DSGetArrayReal(ds,DS_MAT_T,&T);
451: DSGetArrayReal(ds,DS_MAT_D,&D);
452: /* R is the identity matrix (except the sign) */
453: for (i=lc;i<n;i++) {
454: if (PetscRealPart(A[i+i*ld])<0.0) { /* scale column i */
455: for (j=lc;j<n;j++) X[j+i*ld] = -X[j+i*ld];
456: }
457: }
458: PetscArrayzero(T+ld,m-1);
459: PetscArrayzero(T+2*ld,n-1);
460: for (i=lc;i<n;i++) {
461: T[i] = alpha[i-lc];
462: D[i] = beta[i-lc];
463: if (D[i]==0.0) wr[i] = PETSC_INFINITY;
464: else wr[i] = T[i]/D[i];
465: }
466: ds->t = n;
467: DSRestoreArrayReal(ds,DS_MAT_D,&D);
468: DSRestoreArrayReal(ds,DS_MAT_T,&T);
469: } else {
470: /* X = X*inv(R) */
471: q = PetscMin(m,n);
472: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n,&q,&sone,A,&ld,X,&ld));
473: if (m<n) {
474: r = n-m;
475: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&r,&m,&sone,X,&ld,A,&ld,&smone,X+m*ld,&ld));
476: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n,&r,&sone,B+m*ld,&ld,X+m*ld,&ld));
477: }
478: if (k>0) {
479: for (i=k;i<PetscMin(m,k+l);i++) {
480: PetscArraycpy(X+(i-k)*ld,X+i*ld,ld);
481: PetscArraycpy(U+(i-k)*ld,U+i*ld,ld);
482: }
483: }
484: /* singular values */
485: PetscArrayzero(A,ld*ld);
486: PetscArrayzero(B,ld*ld);
487: for (j=k;j<PetscMin(m,k+l);j++) {
488: A[(j-k)*(1+ld)] = alpha[j];
489: B[(j-k)*(1+ld)] = beta[j];
490: wr[j-k] = alpha[j]/beta[j];
491: }
492: ds->t = PetscMin(m,k+l)-k; /* set number of computed values */
493: }
494: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
495: MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
496: MatDenseRestoreArray(ds->omat[DS_MAT_X],&X);
497: MatDenseRestoreArray(ds->omat[DS_MAT_U],&U);
498: MatDenseRestoreArray(ds->omat[DS_MAT_V],&V);
499: return 0;
500: }
502: PetscErrorCode DSCond_GSVD(DS ds,PetscReal *cond)
503: {
504: DS_GSVD *ctx = (DS_GSVD*)ds->data;
505: PetscBLASInt lwork,lrwork=0,info,m,n,p,ld;
506: PetscScalar *A,*work;
507: const PetscScalar *M;
508: PetscReal *sigma,conda,condb;
509: #if defined(PETSC_USE_COMPLEX)
510: PetscReal *rwork;
511: #endif
513: PetscBLASIntCast(ds->n,&m);
514: PetscBLASIntCast(ctx->m,&n);
515: PetscBLASIntCast(ctx->p,&p);
516: PetscBLASIntCast(ds->ld,&ld);
517: lwork = 5*n;
518: #if defined(PETSC_USE_COMPLEX)
519: lrwork = 5*n;
520: #endif
521: DSAllocateWork_Private(ds,ld*n+lwork,n+lrwork,0);
522: A = ds->work;
523: work = ds->work+ld*n;
524: sigma = ds->rwork;
525: #if defined(PETSC_USE_COMPLEX)
526: rwork = ds->rwork+n;
527: #endif
528: if (ds->compact) DSSwitchFormat_GSVD(ds);
530: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
531: MatDenseGetArrayRead(ds->omat[DS_MAT_A],&M);
532: PetscArraycpy(A,M,ld*n);
533: MatDenseRestoreArrayRead(ds->omat[DS_MAT_A],&M);
534: #if defined(PETSC_USE_COMPLEX)
535: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&m,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,rwork,&info));
536: #else
537: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&m,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,&info));
538: #endif
539: SlepcCheckLapackInfo("gesvd",info);
540: conda = sigma[0]/sigma[PetscMin(m,n)-1];
542: MatDenseGetArrayRead(ds->omat[DS_MAT_B],&M);
543: PetscArraycpy(A,M,ld*n);
544: MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&M);
545: #if defined(PETSC_USE_COMPLEX)
546: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&p,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,rwork,&info));
547: #else
548: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&p,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,&info));
549: #endif
550: SlepcCheckLapackInfo("gesvd",info);
551: condb = sigma[0]/sigma[PetscMin(p,n)-1];
552: PetscFPTrapPop();
554: *cond = PetscMax(conda,condb);
555: return 0;
556: }
558: PetscErrorCode DSSynchronize_GSVD(DS ds,PetscScalar eigr[],PetscScalar eigi[])
559: {
560: DS_GSVD *ctx = (DS_GSVD*)ds->data;
561: PetscInt ld=ds->ld,l=ds->l,k=0,kr=0;
562: PetscMPIInt m=ctx->m,rank,off=0,size,n,ldn,ld3;
563: PetscScalar *A,*U,*V,*X;
564: PetscReal *T;
566: if (ds->compact) kr = 3*ld;
567: else k = 2*(m-l)*ld;
568: if (ds->state>DS_STATE_RAW) k += 3*(m-l)*ld;
569: if (eigr) k += m-l;
570: DSAllocateWork_Private(ds,k+kr,0,0);
571: PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
572: PetscMPIIntCast(m-l,&n);
573: PetscMPIIntCast(ld*(m-l),&ldn);
574: PetscMPIIntCast(3*ld,&ld3);
575: if (ds->compact) DSGetArrayReal(ds,DS_MAT_T,&T);
576: else MatDenseGetArray(ds->omat[DS_MAT_A],&A);
577: if (ds->state>DS_STATE_RAW) {
578: MatDenseGetArray(ds->omat[DS_MAT_U],&U);
579: MatDenseGetArray(ds->omat[DS_MAT_V],&V);
580: MatDenseGetArray(ds->omat[DS_MAT_X],&X);
581: }
582: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
583: if (!rank) {
584: if (ds->compact) MPI_Pack(T,ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
585: else MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
586: if (ds->state>DS_STATE_RAW) {
587: MPI_Pack(U+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
588: MPI_Pack(V+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
589: MPI_Pack(X+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
590: }
591: if (eigr) MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
592: }
593: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
594: if (rank) {
595: if (ds->compact) MPI_Unpack(ds->work,size,&off,T,ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
596: else MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
597: if (ds->state>DS_STATE_RAW) {
598: MPI_Unpack(ds->work,size,&off,U+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
599: MPI_Unpack(ds->work,size,&off,V+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
600: MPI_Unpack(ds->work,size,&off,X+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
601: }
602: if (eigr) MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
603: }
604: if (ds->compact) DSRestoreArrayReal(ds,DS_MAT_T,&T);
605: else MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
606: if (ds->state>DS_STATE_RAW) {
607: MatDenseRestoreArray(ds->omat[DS_MAT_U],&U);
608: MatDenseRestoreArray(ds->omat[DS_MAT_V],&V);
609: MatDenseRestoreArray(ds->omat[DS_MAT_X],&X);
610: }
611: return 0;
612: }
614: PetscErrorCode DSMatGetSize_GSVD(DS ds,DSMatType t,PetscInt *rows,PetscInt *cols)
615: {
616: DS_GSVD *ctx = (DS_GSVD*)ds->data;
619: switch (t) {
620: case DS_MAT_A:
621: *rows = ds->n;
622: *cols = ds->extrarow? ctx->m+1: ctx->m;
623: break;
624: case DS_MAT_B:
625: *rows = ctx->p;
626: *cols = ds->extrarow? ctx->m+1: ctx->m;
627: break;
628: case DS_MAT_T:
629: *rows = ds->n;
630: *cols = PetscDefined(USE_COMPLEX)? 2: 3;
631: break;
632: case DS_MAT_D:
633: *rows = ctx->p;
634: *cols = 1;
635: break;
636: case DS_MAT_U:
637: *rows = ds->state==DS_STATE_TRUNCATED? ds->t: ds->n;
638: *cols = ds->n;
639: break;
640: case DS_MAT_V:
641: *rows = ds->state==DS_STATE_TRUNCATED? ctx->tp: ctx->p;
642: *cols = ctx->p;
643: break;
644: case DS_MAT_X:
645: *rows = ds->state==DS_STATE_TRUNCATED? ctx->tm: ctx->m;
646: *cols = ctx->m;
647: break;
648: default:
649: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid t parameter");
650: }
651: return 0;
652: }
654: static PetscErrorCode DSGSVDSetDimensions_GSVD(DS ds,PetscInt m,PetscInt p)
655: {
656: DS_GSVD *ctx = (DS_GSVD*)ds->data;
658: DSCheckAlloc(ds,1);
659: if (m==PETSC_DECIDE || m==PETSC_DEFAULT) {
660: ctx->m = ds->ld;
661: } else {
663: ctx->m = m;
664: }
665: if (p==PETSC_DECIDE || p==PETSC_DEFAULT) {
666: ctx->p = ds->n;
667: } else {
669: ctx->p = p;
670: }
671: return 0;
672: }
674: /*@
675: DSGSVDSetDimensions - Sets the number of columns and rows for a DSGSVD.
677: Logically Collective on ds
679: Input Parameters:
680: + ds - the direct solver context
681: . m - the number of columns
682: - p - the number of rows for the second matrix (B)
684: Notes:
685: This call is complementary to DSSetDimensions(), to provide two dimensions
686: that are specific to this DS type. The number of rows for the first matrix (A)
687: is set by DSSetDimensions().
689: Level: intermediate
691: .seealso: DSGSVDGetDimensions(), DSSetDimensions()
692: @*/
693: PetscErrorCode DSGSVDSetDimensions(DS ds,PetscInt m,PetscInt p)
694: {
698: PetscTryMethod(ds,"DSGSVDSetDimensions_C",(DS,PetscInt,PetscInt),(ds,m,p));
699: return 0;
700: }
702: static PetscErrorCode DSGSVDGetDimensions_GSVD(DS ds,PetscInt *m,PetscInt *p)
703: {
704: DS_GSVD *ctx = (DS_GSVD*)ds->data;
706: if (m) *m = ctx->m;
707: if (p) *p = ctx->p;
708: return 0;
709: }
711: /*@
712: DSGSVDGetDimensions - Returns the number of columns and rows for a DSGSVD.
714: Not collective
716: Input Parameter:
717: . ds - the direct solver context
719: Output Parameters:
720: + m - the number of columns
721: - p - the number of rows for the second matrix (B)
723: Level: intermediate
725: .seealso: DSGSVDSetDimensions()
726: @*/
727: PetscErrorCode DSGSVDGetDimensions(DS ds,PetscInt *m,PetscInt *p)
728: {
730: PetscUseMethod(ds,"DSGSVDGetDimensions_C",(DS,PetscInt*,PetscInt*),(ds,m,p));
731: return 0;
732: }
734: PetscErrorCode DSDestroy_GSVD(DS ds)
735: {
736: PetscFree(ds->data);
737: PetscObjectComposeFunction((PetscObject)ds,"DSGSVDSetDimensions_C",NULL);
738: PetscObjectComposeFunction((PetscObject)ds,"DSGSVDGetDimensions_C",NULL);
739: return 0;
740: }
742: /*MC
743: DSGSVD - Dense Generalized Singular Value Decomposition.
745: Level: beginner
747: Notes:
748: The problem is expressed as A*X = U*C, B*X = V*S, where A and B are
749: matrices with the same number of columns, m, U and V are orthogonal
750: (unitary), and X is an mxm invertible matrix. The DS object does not
751: expose matrices C and S, instead the singular values sigma, which are
752: the ratios c_i/s_i, are returned in the arguments of DSSolve().
753: Note that the number of columns of the returned X, U, V may be smaller
754: in the case that some c_i or s_i are zero.
756: The number of rows of A (and U) is the value n passed with DSSetDimensions().
757: The number of columns m and the number of rows of B (and V) must be
758: set via DSGSVDSetDimensions().
760: Internally, LAPACK's representation is used, U'*A*Q = C*[0 R], V'*B*Q = S*[0 R],
761: where X = Q*inv(R) is computed at the end of DSSolve().
763: If the compact storage format is selected, then a simplified problem is
764: solved, where A and B are bidiagonal (possibly with an arrow), and [A;B]
765: is assumed to have orthonormal columns. We consider two cases: (1) A and B
766: are square mxm upper bidiagonal, and (2) A is lower bidiagonal with m+1
767: rows and B is square upper bidiagonal. In these cases, R=I so it
768: corresponds to the CS decomposition. The first matrix is stored in two
769: diagonals of DS_MAT_T, while the second matrix is stored in DS_MAT_D
770: and the remaining diagonal of DS_MAT_T.
772: Allowed arguments of DSVectors() are DS_MAT_U, DS_MAT_V and DS_MAT_X.
774: Used DS matrices:
775: + DS_MAT_A - first problem matrix
776: . DS_MAT_B - second problem matrix
777: . DS_MAT_T - first upper bidiagonal matrix (if compact storage is selected)
778: - DS_MAT_D - second upper bidiagonal matrix (if compact storage is selected)
780: Implemented methods:
781: . 0 - Lapack (_ggsvd3 if available, or _ggsvd)
783: .seealso: DSCreate(), DSSetType(), DSType, DSGSVDSetDimensions()
784: M*/
785: SLEPC_EXTERN PetscErrorCode DSCreate_GSVD(DS ds)
786: {
787: DS_GSVD *ctx;
789: PetscNew(&ctx);
790: ds->data = (void*)ctx;
792: ds->ops->allocate = DSAllocate_GSVD;
793: ds->ops->view = DSView_GSVD;
794: ds->ops->vectors = DSVectors_GSVD;
795: ds->ops->sort = DSSort_GSVD;
796: ds->ops->solve[0] = DSSolve_GSVD;
797: ds->ops->synchronize = DSSynchronize_GSVD;
798: ds->ops->truncate = DSTruncate_GSVD;
799: ds->ops->update = DSUpdateExtraRow_GSVD;
800: ds->ops->cond = DSCond_GSVD;
801: ds->ops->matgetsize = DSMatGetSize_GSVD;
802: ds->ops->destroy = DSDestroy_GSVD;
803: PetscObjectComposeFunction((PetscObject)ds,"DSGSVDSetDimensions_C",DSGSVDSetDimensions_GSVD);
804: PetscObjectComposeFunction((PetscObject)ds,"DSGSVDGetDimensions_C",DSGSVDGetDimensions_GSVD);
805: return 0;
806: }