Actual source code: dgefa2.c
1: #define PETSCMAT_DLL
3: /*
4: Inverts 2 by 2 matrix using partial pivoting.
6: Used by the sparse factorization routines in
7: src/mat/impls/baij/seq
10: This is a combination of the Linpack routines
11: dgefa() and dgedi() specialized for a size of 2.
13: */
14: #include petscsys.h
18: PetscErrorCode Kernel_A_gets_inverse_A_2(MatScalar *a,PetscReal shift)
19: {
20: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[2],k3;
21: PetscInt k4,j3;
22: MatScalar *aa,*ax,*ay,work[4],stmp;
23: MatReal tmp,max;
25: /* gaussian elimination with partial pivoting */
28: /* Parameter adjustments */
29: a -= 3;
31: /*for (k = 1; k <= 1; ++k) {*/
32: k = 1;
33: kp1 = k + 1;
34: k3 = 2*k;
35: k4 = k3 + k;
36: /* find l = pivot index */
38: i__2 = 3 - k;
39: aa = &a[k4];
40: max = PetscAbsScalar(aa[0]);
41: l = 1;
42: for (ll=1; ll<i__2; ll++) {
43: tmp = PetscAbsScalar(aa[ll]);
44: if (tmp > max) { max = tmp; l = ll+1;}
45: }
46: l += k - 1;
47: ipvt[k-1] = l;
49: if (a[l + k3] == 0.0) {
50: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
51: }
53: /* interchange if necessary */
55: if (l != k) {
56: stmp = a[l + k3];
57: a[l + k3] = a[k4];
58: a[k4] = stmp;
59: }
61: /* compute multipliers */
63: stmp = -1. / a[k4];
64: i__2 = 2 - k;
65: aa = &a[1 + k4];
66: for (ll=0; ll<i__2; ll++) {
67: aa[ll] *= stmp;
68: }
70: /* row elimination with column indexing */
72: ax = &a[k4+1];
73: for (j = kp1; j <= 2; ++j) {
74: j3 = 2*j;
75: stmp = a[l + j3];
76: if (l != k) {
77: a[l + j3] = a[k + j3];
78: a[k + j3] = stmp;
79: }
81: i__3 = 2 - k;
82: ay = &a[1+k+j3];
83: for (ll=0; ll<i__3; ll++) {
84: ay[ll] += stmp*ax[ll];
85: }
86: }
87: /*}*/
88: ipvt[1] = 2;
89: if (a[6] == 0.0) {
90: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",1);
91: }
93: /*
94: Now form the inverse
95: */
97: /* compute inverse(u) */
99: for (k = 1; k <= 2; ++k) {
100: k3 = 2*k;
101: k4 = k3 + k;
102: a[k4] = 1.0 / a[k4];
103: stmp = -a[k4];
104: i__2 = k - 1;
105: aa = &a[k3 + 1];
106: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
107: kp1 = k + 1;
108: if (2 < kp1) continue;
109: ax = aa;
110: for (j = kp1; j <= 2; ++j) {
111: j3 = 2*j;
112: stmp = a[k + j3];
113: a[k + j3] = 0.0;
114: ay = &a[j3 + 1];
115: for (ll=0; ll<k; ll++) {
116: ay[ll] += stmp*ax[ll];
117: }
118: }
119: }
121: /* form inverse(u)*inverse(l) */
123: /*for (kb = 1; kb <= 1; ++kb) {*/
124:
125: k = 1;
126: k3 = 2*k;
127: kp1 = k + 1;
128: aa = a + k3;
129: for (i = kp1; i <= 2; ++i) {
130: work[i-1] = aa[i];
131: aa[i] = 0.0;
132: }
133: for (j = kp1; j <= 2; ++j) {
134: stmp = work[j-1];
135: ax = &a[2*j + 1];
136: ay = &a[k3 + 1];
137: ay[0] += stmp*ax[0];
138: ay[1] += stmp*ax[1];
139: }
140: l = ipvt[k-1];
141: if (l != k) {
142: ax = &a[k3 + 1];
143: ay = &a[2*l + 1];
144: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
145: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
146: }
147:
148: return(0);
149: }
153: PetscErrorCode Kernel_A_gets_inverse_A_9(MatScalar *a,PetscReal shift)
154: {
155: PetscInt i__2,i__3,kp1,j,k,l,ll,i,ipvt[9],kb,k3;
156: PetscInt k4,j3;
157: MatScalar *aa,*ax,*ay,work[81],stmp;
158: MatReal tmp,max;
160: /* gaussian elimination with partial pivoting */
163: /* Parameter adjustments */
164: a -= 10;
166: for (k = 1; k <= 8; ++k) {
167: kp1 = k + 1;
168: k3 = 9*k;
169: k4 = k3 + k;
170: /* find l = pivot index */
172: i__2 = 10 - k;
173: aa = &a[k4];
174: max = PetscAbsScalar(aa[0]);
175: l = 1;
176: for (ll=1; ll<i__2; ll++) {
177: tmp = PetscAbsScalar(aa[ll]);
178: if (tmp > max) { max = tmp; l = ll+1;}
179: }
180: l += k - 1;
181: ipvt[k-1] = l;
183: if (a[l + k3] == 0.0) {
184: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
185: }
187: /* interchange if necessary */
189: if (l != k) {
190: stmp = a[l + k3];
191: a[l + k3] = a[k4];
192: a[k4] = stmp;
193: }
195: /* compute multipliers */
197: stmp = -1. / a[k4];
198: i__2 = 9 - k;
199: aa = &a[1 + k4];
200: for (ll=0; ll<i__2; ll++) {
201: aa[ll] *= stmp;
202: }
204: /* row elimination with column indexing */
206: ax = &a[k4+1];
207: for (j = kp1; j <= 9; ++j) {
208: j3 = 9*j;
209: stmp = a[l + j3];
210: if (l != k) {
211: a[l + j3] = a[k + j3];
212: a[k + j3] = stmp;
213: }
215: i__3 = 9 - k;
216: ay = &a[1+k+j3];
217: for (ll=0; ll<i__3; ll++) {
218: ay[ll] += stmp*ax[ll];
219: }
220: }
221: }
222: ipvt[8] = 9;
223: if (a[90] == 0.0) {
224: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
225: }
227: /*
228: Now form the inverse
229: */
231: /* compute inverse(u) */
233: for (k = 1; k <= 9; ++k) {
234: k3 = 9*k;
235: k4 = k3 + k;
236: a[k4] = 1.0 / a[k4];
237: stmp = -a[k4];
238: i__2 = k - 1;
239: aa = &a[k3 + 1];
240: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
241: kp1 = k + 1;
242: if (9 < kp1) continue;
243: ax = aa;
244: for (j = kp1; j <= 9; ++j) {
245: j3 = 9*j;
246: stmp = a[k + j3];
247: a[k + j3] = 0.0;
248: ay = &a[j3 + 1];
249: for (ll=0; ll<k; ll++) {
250: ay[ll] += stmp*ax[ll];
251: }
252: }
253: }
255: /* form inverse(u)*inverse(l) */
257: for (kb = 1; kb <= 8; ++kb) {
258: k = 9 - kb;
259: k3 = 9*k;
260: kp1 = k + 1;
261: aa = a + k3;
262: for (i = kp1; i <= 9; ++i) {
263: work[i-1] = aa[i];
264: aa[i] = 0.0;
265: }
266: for (j = kp1; j <= 9; ++j) {
267: stmp = work[j-1];
268: ax = &a[9*j + 1];
269: ay = &a[k3 + 1];
270: ay[0] += stmp*ax[0];
271: ay[1] += stmp*ax[1];
272: ay[2] += stmp*ax[2];
273: ay[3] += stmp*ax[3];
274: ay[4] += stmp*ax[4];
275: ay[5] += stmp*ax[5];
276: ay[6] += stmp*ax[6];
277: ay[7] += stmp*ax[7];
278: ay[8] += stmp*ax[8];
279: }
280: l = ipvt[k-1];
281: if (l != k) {
282: ax = &a[k3 + 1];
283: ay = &a[9*l + 1];
284: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
285: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
286: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
287: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
288: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
289: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
290: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
291: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
292: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
293: }
294: }
295: return(0);
296: }
298: /*
299: Inverts 15 by 15 matrix using partial pivoting.
301: Used by the sparse factorization routines in
302: src/mat/impls/baij/seq
304: This is a combination of the Linpack routines
305: dgefa() and dgedi() specialized for a size of 15.
307: */
308: #include petsc.h
312: PetscErrorCode Kernel_A_gets_inverse_A_15(MatScalar *a,PetscInt *ipvt,MatScalar *work,PetscReal shift)
313: {
314: PetscInt i__2,i__3,kp1,j,k,l,ll,i,kb,k3;
315: PetscInt k4,j3;
316: MatScalar *aa,*ax,*ay,stmp;
317: MatReal tmp,max;
319: /* gaussian elimination with partial pivoting */
322: /* Parameter adjustments */
323: a -= 16;
325: for (k = 1; k <= 14; ++k) {
326: kp1 = k + 1;
327: k3 = 15*k;
328: k4 = k3 + k;
329: /* find l = pivot index */
331: i__2 = 16 - k;
332: aa = &a[k4];
333: max = PetscAbsScalar(aa[0]);
334: l = 1;
335: for (ll=1; ll<i__2; ll++) {
336: tmp = PetscAbsScalar(aa[ll]);
337: if (tmp > max) { max = tmp; l = ll+1;}
338: }
339: l += k - 1;
340: ipvt[k-1] = l;
342: if (a[l + k3] == 0.0) {
343: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1);
344: }
346: /* interchange if necessary */
348: if (l != k) {
349: stmp = a[l + k3];
350: a[l + k3] = a[k4];
351: a[k4] = stmp;
352: }
354: /* compute multipliers */
356: stmp = -1. / a[k4];
357: i__2 = 15 - k;
358: aa = &a[1 + k4];
359: for (ll=0; ll<i__2; ll++) {
360: aa[ll] *= stmp;
361: }
363: /* row elimination with column indexing */
365: ax = &a[k4+1];
366: for (j = kp1; j <= 15; ++j) {
367: j3 = 15*j;
368: stmp = a[l + j3];
369: if (l != k) {
370: a[l + j3] = a[k + j3];
371: a[k + j3] = stmp;
372: }
374: i__3 = 15 - k;
375: ay = &a[1+k+j3];
376: for (ll=0; ll<i__3; ll++) {
377: ay[ll] += stmp*ax[ll];
378: }
379: }
380: }
381: ipvt[14] = 15;
382: if (a[240] == 0.0) {
383: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",6);
384: }
386: /*
387: Now form the inverse
388: */
390: /* compute inverse(u) */
392: for (k = 1; k <= 15; ++k) {
393: k3 = 15*k;
394: k4 = k3 + k;
395: a[k4] = 1.0 / a[k4];
396: stmp = -a[k4];
397: i__2 = k - 1;
398: aa = &a[k3 + 1];
399: for (ll=0; ll<i__2; ll++) aa[ll] *= stmp;
400: kp1 = k + 1;
401: if (15 < kp1) continue;
402: ax = aa;
403: for (j = kp1; j <= 15; ++j) {
404: j3 = 15*j;
405: stmp = a[k + j3];
406: a[k + j3] = 0.0;
407: ay = &a[j3 + 1];
408: for (ll=0; ll<k; ll++) {
409: ay[ll] += stmp*ax[ll];
410: }
411: }
412: }
414: /* form inverse(u)*inverse(l) */
416: for (kb = 1; kb <= 14; ++kb) {
417: k = 15 - kb;
418: k3 = 15*k;
419: kp1 = k + 1;
420: aa = a + k3;
421: for (i = kp1; i <= 15; ++i) {
422: work[i-1] = aa[i];
423: aa[i] = 0.0;
424: }
425: for (j = kp1; j <= 15; ++j) {
426: stmp = work[j-1];
427: ax = &a[15*j + 1];
428: ay = &a[k3 + 1];
429: ay[0] += stmp*ax[0];
430: ay[1] += stmp*ax[1];
431: ay[2] += stmp*ax[2];
432: ay[3] += stmp*ax[3];
433: ay[4] += stmp*ax[4];
434: ay[5] += stmp*ax[5];
435: ay[6] += stmp*ax[6];
436: ay[7] += stmp*ax[7];
437: ay[8] += stmp*ax[8];
438: ay[9] += stmp*ax[9];
439: ay[10] += stmp*ax[10];
440: ay[11] += stmp*ax[11];
441: ay[12] += stmp*ax[12];
442: ay[13] += stmp*ax[13];
443: ay[14] += stmp*ax[14];
444: }
445: l = ipvt[k-1];
446: if (l != k) {
447: ax = &a[k3 + 1];
448: ay = &a[15*l + 1];
449: stmp = ax[0]; ax[0] = ay[0]; ay[0] = stmp;
450: stmp = ax[1]; ax[1] = ay[1]; ay[1] = stmp;
451: stmp = ax[2]; ax[2] = ay[2]; ay[2] = stmp;
452: stmp = ax[3]; ax[3] = ay[3]; ay[3] = stmp;
453: stmp = ax[4]; ax[4] = ay[4]; ay[4] = stmp;
454: stmp = ax[5]; ax[5] = ay[5]; ay[5] = stmp;
455: stmp = ax[6]; ax[6] = ay[6]; ay[6] = stmp;
456: stmp = ax[7]; ax[7] = ay[7]; ay[7] = stmp;
457: stmp = ax[8]; ax[8] = ay[8]; ay[8] = stmp;
458: stmp = ax[9]; ax[9] = ay[9]; ay[9] = stmp;
459: stmp = ax[10]; ax[10] = ay[10]; ay[10] = stmp;
460: stmp = ax[11]; ax[11] = ay[11]; ay[11] = stmp;
461: stmp = ax[12]; ax[12] = ay[12]; ay[12] = stmp;
462: stmp = ax[13]; ax[13] = ay[13]; ay[13] = stmp;
463: stmp = ax[14]; ax[14] = ay[14]; ay[14] = stmp;
464: }
465: }
466: return(0);
467: }