Actual source code: relax.h
2: /*
3: This is included by sbaij.c to generate unsigned short and regular versions of these two functions
4: */
7: #if defined(USESHORT)
8: PetscErrorCode MatMult_SeqSBAIJ_1_Hermitian_ushort(Mat A,Vec xx,Vec zz)
9: #else
10: PetscErrorCode MatMult_SeqSBAIJ_1_Hermitian(Mat A,Vec xx,Vec zz)
11: #endif
12: {
13: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
14: const PetscScalar *x;
15: PetscScalar *z,x1,sum;
16: const MatScalar *v;
17: MatScalar vj;
18: PetscErrorCode ierr;
19: PetscInt mbs=a->mbs,i,j,nz;
20: const PetscInt *ai=a->i;
21: #if defined(USESHORT)
22: const unsigned short *ib=a->jshort;
23: unsigned short ibt;
24: #else
25: const PetscInt *ib=a->j;
26: PetscInt ibt;
27: #endif
30: VecSet(zz,0.0);
31: VecGetArray(xx,(PetscScalar**)&x);
32: VecGetArray(zz,&z);
34: v = a->a;
35: for (i=0; i<mbs; i++) {
36: nz = ai[i+1] - ai[i]; /* length of i_th row of A */
37: x1 = x[i];
38: sum = v[0]*x1; /* diagonal term */
39: for (j=1; j<nz; j++) {
40: ibt = ib[j];
41: vj = v[j];
42: sum += vj * x[ibt]; /* (strict upper triangular part of A)*x */
43: z[ibt] += PetscConj(v[j]) * x1; /* (strict lower triangular part of A)*x */
44: }
45: z[i] += sum;
46: v += nz;
47: ib += nz;
48: }
50: VecRestoreArray(xx,(PetscScalar**)&x);
51: VecRestoreArray(zz,&z);
52: PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);
53: return(0);
54: }
58: #if defined(USESHORT)
59: PetscErrorCode MatMult_SeqSBAIJ_1_ushort(Mat A,Vec xx,Vec zz)
60: #else
61: PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
62: #endif
63: {
64: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
65: const PetscScalar *x;
66: PetscScalar *z,x1,sum;
67: const MatScalar *v;
68: MatScalar vj;
69: PetscErrorCode ierr;
70: PetscInt mbs=a->mbs,i,j,nz;
71: const PetscInt *ai=a->i;
72: #if defined(USESHORT)
73: const unsigned short *ib=a->jshort;
74: unsigned short ibt;
75: #else
76: const PetscInt *ib=a->j;
77: PetscInt ibt;
78: #endif
81: VecSet(zz,0.0);
82: VecGetArray(xx,(PetscScalar**)&x);
83: VecGetArray(zz,&z);
85: v = a->a;
86: for (i=0; i<mbs; i++) {
87: nz = ai[i+1] - ai[i]; /* length of i_th row of A */
88: x1 = x[i];
89: sum = v[0]*x1; /* diagonal term */
90: for (j=1; j<nz; j++) {
91: ibt = ib[j];
92: vj = v[j];
93: z[ibt] += vj * x1; /* (strict lower triangular part of A)*x */
94: sum += vj * x[ibt]; /* (strict upper triangular part of A)*x */
95: }
96: z[i] += sum;
97: v += nz;
98: ib += nz;
99: }
101: VecRestoreArray(xx,(PetscScalar**)&x);
102: VecRestoreArray(zz,&z);
103: PetscLogFlops(2.0*(2.0*a->nz - mbs) - mbs);
104: return(0);
105: }
109: #if defined(USESHORT)
110: PetscErrorCode MatSOR_SeqSBAIJ_ushort(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
111: #else
112: PetscErrorCode MatSOR_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
113: #endif
114: {
115: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
116: const MatScalar *aa=a->a,*v,*v1,*aidiag;
117: PetscScalar *x,*t,sum;
118: const PetscScalar *b;
119: MatScalar tmp;
120: PetscErrorCode ierr;
121: PetscInt m=a->mbs,bs=A->rmap->bs,j;
122: const PetscInt *ai=a->i;
123: #if defined(USESHORT)
124: const unsigned short *aj=a->jshort,*vj,*vj1;
125: #else
126: const PetscInt *aj=a->j,*vj,*vj1;
127: #endif
128: PetscInt nz,nz1,i;
131: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
133: its = its*lits;
134: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
136: if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
138: VecGetArray(xx,&x);
139: if (xx != bb) {
140: VecGetArray(bb,(PetscScalar**)&b);
141: } else {
142: b = x;
143: }
145: if (!a->idiagvalid) {
146: if (!a->idiag) {
147: PetscMalloc(m*sizeof(PetscScalar),&a->idiag);
148: }
149: for (i=0; i<a->mbs; i++) a->idiag[i] = 1.0/a->a[a->i[i]];
150: a->idiagvalid = PETSC_TRUE;
151: }
153: if (!a->sor_work) {
154: PetscMalloc(m*sizeof(PetscScalar),&a->sor_work);
155: }
156: t = a->sor_work;
158: aidiag = a->idiag;
160: if (flag == SOR_APPLY_UPPER) {
161: /* apply (U + D/omega) to the vector */
162: PetscScalar d;
163: for (i=0; i<m; i++) {
164: d = fshift + aa[ai[i]];
165: nz = ai[i+1] - ai[i] - 1;
166: vj = aj + ai[i] + 1;
167: v = aa + ai[i] + 1;
168: sum = b[i]*d/omega;
169: PetscSparseDensePlusDot(sum,b,v,vj,nz);
170: x[i] = sum;
171: }
172: PetscLogFlops(a->nz);
173: }
175: if (flag & SOR_ZERO_INITIAL_GUESS) {
176: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
177: PetscMemcpy(t,b,m*sizeof(PetscScalar));
179: v = aa + 1;
180: vj = aj + 1;
181: for (i=0; i<m; i++){
182: nz = ai[i+1] - ai[i] - 1;
183: tmp = - (x[i] = omega*t[i]*aidiag[i]);
184: for (j=0; j<nz; j++) {
185: t[vj[j]] += tmp*v[j];
186: }
187: v += nz + 1;
188: vj += nz + 1;
189: }
190: PetscLogFlops(2*a->nz);
191: }
193: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
194: int nz2;
195: if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){
196: #if defined(PETSC_USE_BACKWARD_LOOP)
197: v = aa + ai[m] - 1;
198: vj = aj + ai[m] - 1;
199: for (i=m-1; i>=0; i--){
200: sum = b[i];
201: nz = ai[i+1] - ai[i] - 1;
202: {PetscInt __i;for(__i=0;__i<nz;__i++) sum -= v[-__i] * x[vj[-__i]];}
203: #else
204: v = aa + ai[m-1] + 1;
205: vj = aj + ai[m-1] + 1;
206: nz = 0;
207: for (i=m-1; i>=0; i--){
208: sum = b[i];
209: nz2 = ai[i] - ai[i-1] - 1;
210: PETSC_Prefetch(v-nz2-1,0,1);
211: PETSC_Prefetch(vj-nz2-1,0,1);
212: PetscSparseDenseMinusDot(sum,x,v,vj,nz);
213: nz = nz2;
214: #endif
215: x[i] = omega*sum*aidiag[i];
216: v -= nz + 1;
217: vj -= nz + 1;
218: }
219: PetscLogFlops(2*a->nz);
220: } else {
221: v = aa + ai[m-1] + 1;
222: vj = aj + ai[m-1] + 1;
223: nz = 0;
224: for (i=m-1; i>=0; i--){
225: sum = t[i];
226: nz2 = ai[i] - ai[i-1] - 1;
227: PETSC_Prefetch(v-nz2-1,0,1);
228: PETSC_Prefetch(vj-nz2-1,0,1);
229: PetscSparseDenseMinusDot(sum,x,v,vj,nz);
230: x[i] = (1-omega)*x[i] + omega*sum*aidiag[i];
231: nz = nz2;
232: v -= nz + 1;
233: vj -= nz + 1;
234: }
235: PetscLogFlops(2*a->nz);
236: }
237: }
238: its--;
239: }
241: while (its--) {
242: /*
243: forward sweep:
244: for i=0,...,m-1:
245: sum[i] = (b[i] - U(i,:)x )/d[i];
246: x[i] = (1-omega)x[i] + omega*sum[i];
247: b = b - x[i]*U^T(i,:);
248:
249: */
250: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
251: PetscMemcpy(t,b,m*sizeof(PetscScalar));
253: for (i=0; i<m; i++){
254: v = aa + ai[i] + 1; v1=v;
255: vj = aj + ai[i] + 1; vj1=vj;
256: nz = ai[i+1] - ai[i] - 1; nz1=nz;
257: sum = t[i];
258: PetscLogFlops(4.0*nz-2);
259: while (nz1--) sum -= (*v1++)*x[*vj1++];
260: x[i] = (1-omega)*x[i] + omega*sum*aidiag[i];
261: while (nz--) t[*vj++] -= x[i]*(*v++);
262: }
263: }
264:
265: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
266: /*
267: backward sweep:
268: b = b - x[i]*U^T(i,:), i=0,...,n-2
269: for i=m-1,...,0:
270: sum[i] = (b[i] - U(i,:)x )/d[i];
271: x[i] = (1-omega)x[i] + omega*sum[i];
272: */
273: /* if there was a forward sweep done above then I thing the next two for loops are not needed */
274: PetscMemcpy(t,b,m*sizeof(PetscScalar));
275:
276: for (i=0; i<m-1; i++){ /* update rhs */
277: v = aa + ai[i] + 1;
278: vj = aj + ai[i] + 1;
279: nz = ai[i+1] - ai[i] - 1;
280: PetscLogFlops(2.0*nz-1);
281: while (nz--) t[*vj++] -= x[i]*(*v++);
282: }
283: for (i=m-1; i>=0; i--){
284: v = aa + ai[i] + 1;
285: vj = aj + ai[i] + 1;
286: nz = ai[i+1] - ai[i] - 1;
287: PetscLogFlops(2.0*nz-1);
288: sum = t[i];
289: while (nz--) sum -= x[*vj++]*(*v++);
290: x[i] = (1-omega)*x[i] + omega*sum*aidiag[i];
291: }
292: }
293: }
295: VecRestoreArray(xx,&x);
296: if (bb != xx) {
297: VecRestoreArray(bb,(PetscScalar**)&b);
298: }
299: return(0);
300: }