Actual source code: dvec2.c
1: #define PETSCVEC_DLL
3: /*
4: Defines some vector operation functions that are shared by
5: sequential and parallel vectors.
6: */
7: #include ../src/vec/vec/impls/dvecimpl.h
8: #include private/petscaxpy.h
10: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
11: #include "../src/vec/vec/impls/seq/ftn-kernels/fmdot.h"
14: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
15: {
16: Vec_Seq *xv = (Vec_Seq *)xin->data;
17: PetscErrorCode ierr;
18: PetscInt i,nv_rem,n = xin->map->n;
19: PetscScalar sum0,sum1,sum2,sum3;
20: const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
21: Vec *yy;
24: sum0 = 0.0;
25: sum1 = 0.0;
26: sum2 = 0.0;
28: i = nv;
29: nv_rem = nv&0x3;
30: yy = (Vec*)yin;
31: x = xv->array;
33: switch (nv_rem) {
34: case 3:
35: VecGetArray(yy[0],(PetscScalar**)&yy0);
36: VecGetArray(yy[1],(PetscScalar**)&yy1);
37: VecGetArray(yy[2],(PetscScalar**)&yy2);
38: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
39: VecRestoreArray(yy[0],(PetscScalar**)&yy0);
40: VecRestoreArray(yy[1],(PetscScalar**)&yy1);
41: VecRestoreArray(yy[2],(PetscScalar**)&yy2);
42: z[0] = sum0;
43: z[1] = sum1;
44: z[2] = sum2;
45: break;
46: case 2:
47: VecGetArray(yy[0],(PetscScalar**)&yy0);
48: VecGetArray(yy[1],(PetscScalar**)&yy1);
49: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
50: VecRestoreArray(yy[0],(PetscScalar**)&yy0);
51: VecRestoreArray(yy[1],(PetscScalar**)&yy1);
52: z[0] = sum0;
53: z[1] = sum1;
54: break;
55: case 1:
56: VecGetArray(yy[0],(PetscScalar**)&yy0);
57: fortranmdot1_(x,yy0,&n,&sum0);
58: VecRestoreArray(yy[0],(PetscScalar**)&yy0);
59: z[0] = sum0;
60: break;
61: case 0:
62: break;
63: }
64: z += nv_rem;
65: i -= nv_rem;
66: yy += nv_rem;
68: while (i >0) {
69: sum0 = 0.;
70: sum1 = 0.;
71: sum2 = 0.;
72: sum3 = 0.;
73: VecGetArray(yy[0],(PetscScalar**)&yy0);
74: VecGetArray(yy[1],(PetscScalar**)&yy1);
75: VecGetArray(yy[2],(PetscScalar**)&yy2);
76: VecGetArray(yy[3],(PetscScalar**)&yy3);
77: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
78: VecRestoreArray(yy[0],(PetscScalar**)&yy0);
79: VecRestoreArray(yy[1],(PetscScalar**)&yy1);
80: VecRestoreArray(yy[2],(PetscScalar**)&yy2);
81: VecRestoreArray(yy[3],(PetscScalar**)&yy3);
82: yy += 4;
83: z[0] = sum0;
84: z[1] = sum1;
85: z[2] = sum2;
86: z[3] = sum3;
87: z += 4;
88: i -= 4;
89: }
90: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
91: return(0);
92: }
94: #else
97: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
98: {
99: Vec_Seq *xv = (Vec_Seq *)xin->data;
100: PetscErrorCode ierr;
101: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
102: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3;
103: const PetscScalar * PETSC_RESTRICT yy0,* PETSC_RESTRICT yy1,* PETSC_RESTRICT yy2,*PETSC_RESTRICT yy3,* PETSC_RESTRICT x;
104: Vec *yy;
107: sum0 = 0.;
108: sum1 = 0.;
109: sum2 = 0.;
111: i = nv;
112: nv_rem = nv&0x3;
113: yy = (Vec *)yin;
114: j = n;
115: x = xv->array;
117: switch (nv_rem) {
118: case 3:
119: VecGetArray(yy[0],(PetscScalar **)&yy0);
120: VecGetArray(yy[1],(PetscScalar **)&yy1);
121: VecGetArray(yy[2],(PetscScalar **)&yy2);
122: switch (j_rem=j&0x3) {
123: case 3:
124: x2 = x[2];
125: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
126: sum2 += x2*PetscConj(yy2[2]);
127: case 2:
128: x1 = x[1];
129: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
130: sum2 += x1*PetscConj(yy2[1]);
131: case 1:
132: x0 = x[0];
133: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
134: sum2 += x0*PetscConj(yy2[0]);
135: case 0:
136: x += j_rem;
137: yy0 += j_rem;
138: yy1 += j_rem;
139: yy2 += j_rem;
140: j -= j_rem;
141: break;
142: }
143: while (j>0) {
144: x0 = x[0];
145: x1 = x[1];
146: x2 = x[2];
147: x3 = x[3];
148: x += 4;
149:
150: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
151: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
152: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
153: j -= 4;
154: }
155: z[0] = sum0;
156: z[1] = sum1;
157: z[2] = sum2;
158: VecRestoreArray(yy[0],(PetscScalar **)&yy0);
159: VecRestoreArray(yy[1],(PetscScalar **)&yy1);
160: VecRestoreArray(yy[2],(PetscScalar **)&yy2);
161: break;
162: case 2:
163: VecGetArray(yy[0],(PetscScalar **)&yy0);
164: VecGetArray(yy[1],(PetscScalar **)&yy1);
165: switch (j_rem=j&0x3) {
166: case 3:
167: x2 = x[2];
168: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
169: case 2:
170: x1 = x[1];
171: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
172: case 1:
173: x0 = x[0];
174: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
175: case 0:
176: x += j_rem;
177: yy0 += j_rem;
178: yy1 += j_rem;
179: j -= j_rem;
180: break;
181: }
182: while (j>0) {
183: x0 = x[0];
184: x1 = x[1];
185: x2 = x[2];
186: x3 = x[3];
187: x += 4;
188:
189: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
190: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
191: j -= 4;
192: }
193: z[0] = sum0;
194: z[1] = sum1;
195:
196: VecRestoreArray(yy[0],(PetscScalar **)&yy0);
197: VecRestoreArray(yy[1],(PetscScalar **)&yy1);
198: break;
199: case 1:
200: VecGetArray(yy[0],(PetscScalar **)&yy0);
201: switch (j_rem=j&0x3) {
202: case 3:
203: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
204: case 2:
205: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
206: case 1:
207: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
208: case 0:
209: x += j_rem;
210: yy0 += j_rem;
211: j -= j_rem;
212: break;
213: }
214: while (j>0) {
215: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
216: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
217: yy0+=4;
218: j -= 4; x+=4;
219: }
220: z[0] = sum0;
222: VecRestoreArray(yy[0],(PetscScalar **)&yy0);
223: break;
224: case 0:
225: break;
226: }
227: z += nv_rem;
228: i -= nv_rem;
229: yy += nv_rem;
231: while (i >0) {
232: sum0 = 0.;
233: sum1 = 0.;
234: sum2 = 0.;
235: sum3 = 0.;
236: VecGetArray(yy[0],(PetscScalar **)&yy0);
237: VecGetArray(yy[1],(PetscScalar **)&yy1);
238: VecGetArray(yy[2],(PetscScalar **)&yy2);
239: VecGetArray(yy[3],(PetscScalar **)&yy3);
241: j = n;
242: x = xv->array;
243: switch (j_rem=j&0x3) {
244: case 3:
245: x2 = x[2];
246: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
247: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
248: case 2:
249: x1 = x[1];
250: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
251: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
252: case 1:
253: x0 = x[0];
254: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
255: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
256: case 0:
257: x += j_rem;
258: yy0 += j_rem;
259: yy1 += j_rem;
260: yy2 += j_rem;
261: yy3 += j_rem;
262: j -= j_rem;
263: break;
264: }
265: while (j>0) {
266: x0 = x[0];
267: x1 = x[1];
268: x2 = x[2];
269: x3 = x[3];
270: x += 4;
271:
272: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
273: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
274: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
275: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
276: j -= 4;
277: }
278: z[0] = sum0;
279: z[1] = sum1;
280: z[2] = sum2;
281: z[3] = sum3;
282: z += 4;
283: i -= 4;
284: VecRestoreArray(yy[0],(PetscScalar **)&yy0);
285: VecRestoreArray(yy[1],(PetscScalar **)&yy1);
286: VecRestoreArray(yy[2],(PetscScalar **)&yy2);
287: VecRestoreArray(yy[3],(PetscScalar **)&yy3);
288: yy += 4;
289: }
290: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
291: return(0);
292: }
293: #endif
295: /* ----------------------------------------------------------------------------*/
298: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
299: {
300: Vec_Seq *xv = (Vec_Seq *)xin->data;
302: PetscInt n = xin->map->n,i,j,nv_rem,j_rem;
303: PetscScalar sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,x0,x1,x2,x3,*x;
304: Vec *yy;
305:
308: sum0 = 0.;
309: sum1 = 0.;
310: sum2 = 0.;
312: i = nv;
313: nv_rem = nv&0x3;
314: yy = (Vec*)yin;
315: j = n;
316: x = xv->array;
318: switch (nv_rem) {
319: case 3:
320: VecGetArray(yy[0],&yy0);
321: VecGetArray(yy[1],&yy1);
322: VecGetArray(yy[2],&yy2);
323: switch (j_rem=j&0x3) {
324: case 3:
325: x2 = x[2];
326: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
327: sum2 += x2*yy2[2];
328: case 2:
329: x1 = x[1];
330: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
331: sum2 += x1*yy2[1];
332: case 1:
333: x0 = x[0];
334: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
335: sum2 += x0*yy2[0];
336: case 0:
337: x += j_rem;
338: yy0 += j_rem;
339: yy1 += j_rem;
340: yy2 += j_rem;
341: j -= j_rem;
342: break;
343: }
344: while (j>0) {
345: x0 = x[0];
346: x1 = x[1];
347: x2 = x[2];
348: x3 = x[3];
349: x += 4;
350:
351: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
352: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
353: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
354: j -= 4;
355: }
356: z[0] = sum0;
357: z[1] = sum1;
358: z[2] = sum2;
359: VecRestoreArray(yy[0],&yy0);
360: VecRestoreArray(yy[1],&yy1);
361: VecRestoreArray(yy[2],&yy2);
362: break;
363: case 2:
364: VecGetArray(yy[0],&yy0);
365: VecGetArray(yy[1],&yy1);
366: switch (j_rem=j&0x3) {
367: case 3:
368: x2 = x[2];
369: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
370: case 2:
371: x1 = x[1];
372: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
373: case 1:
374: x0 = x[0];
375: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
376: case 0:
377: x += j_rem;
378: yy0 += j_rem;
379: yy1 += j_rem;
380: j -= j_rem;
381: break;
382: }
383: while (j>0) {
384: x0 = x[0];
385: x1 = x[1];
386: x2 = x[2];
387: x3 = x[3];
388: x += 4;
389:
390: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
391: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
392: j -= 4;
393: }
394: z[0] = sum0;
395: z[1] = sum1;
396:
397: VecRestoreArray(yy[0],&yy0);
398: VecRestoreArray(yy[1],&yy1);
399: break;
400: case 1:
401: VecGetArray(yy[0],&yy0);
402: switch (j_rem=j&0x3) {
403: case 3:
404: x2 = x[2]; sum0 += x2*yy0[2];
405: case 2:
406: x1 = x[1]; sum0 += x1*yy0[1];
407: case 1:
408: x0 = x[0]; sum0 += x0*yy0[0];
409: case 0:
410: x += j_rem;
411: yy0 += j_rem;
412: j -= j_rem;
413: break;
414: }
415: while (j>0) {
416: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
417: j -= 4; x+=4;
418: }
419: z[0] = sum0;
421: VecRestoreArray(yy[0],&yy0);
422: break;
423: case 0:
424: break;
425: }
426: z += nv_rem;
427: i -= nv_rem;
428: yy += nv_rem;
430: while (i >0) {
431: sum0 = 0.;
432: sum1 = 0.;
433: sum2 = 0.;
434: sum3 = 0.;
435: VecGetArray(yy[0],&yy0);
436: VecGetArray(yy[1],&yy1);
437: VecGetArray(yy[2],&yy2);
438: VecGetArray(yy[3],&yy3);
440: j = n;
441: x = xv->array;
442: switch (j_rem=j&0x3) {
443: case 3:
444: x2 = x[2];
445: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
446: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
447: case 2:
448: x1 = x[1];
449: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
450: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
451: case 1:
452: x0 = x[0];
453: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
454: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
455: case 0:
456: x += j_rem;
457: yy0 += j_rem;
458: yy1 += j_rem;
459: yy2 += j_rem;
460: yy3 += j_rem;
461: j -= j_rem;
462: break;
463: }
464: while (j>0) {
465: x0 = x[0];
466: x1 = x[1];
467: x2 = x[2];
468: x3 = x[3];
469: x += 4;
470:
471: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
472: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
473: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
474: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
475: j -= 4;
476: }
477: z[0] = sum0;
478: z[1] = sum1;
479: z[2] = sum2;
480: z[3] = sum3;
481: z += 4;
482: i -= 4;
483: VecRestoreArray(yy[0],&yy0);
484: VecRestoreArray(yy[1],&yy1);
485: VecRestoreArray(yy[2],&yy2);
486: VecRestoreArray(yy[3],&yy3);
487: yy += 4;
488: }
489: PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
490: return(0);
491: }
492:
496: PetscErrorCode VecMax_Seq(Vec xin,PetscInt* idx,PetscReal * z)
497: {
498: Vec_Seq *x = (Vec_Seq*)xin->data;
499: PetscInt i,j=0,n = xin->map->n;
500: PetscReal max,tmp;
501: PetscScalar *xx = x->array;
504: if (!n) {
505: max = PETSC_MIN;
506: j = -1;
507: } else {
508: #if defined(PETSC_USE_COMPLEX)
509: max = PetscRealPart(*xx++); j = 0;
510: #else
511: max = *xx++; j = 0;
512: #endif
513: for (i=1; i<n; i++) {
514: #if defined(PETSC_USE_COMPLEX)
515: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
516: #else
517: if ((tmp = *xx++) > max) { j = i; max = tmp; }
518: #endif
519: }
520: }
521: *z = max;
522: if (idx) *idx = j;
523: return(0);
524: }
528: PetscErrorCode VecMin_Seq(Vec xin,PetscInt* idx,PetscReal * z)
529: {
530: Vec_Seq *x = (Vec_Seq*)xin->data;
531: PetscInt i,j=0,n = xin->map->n;
532: PetscReal min,tmp;
533: PetscScalar *xx = x->array;
536: if (!n) {
537: min = PETSC_MAX;
538: j = -1;
539: } else {
540: #if defined(PETSC_USE_COMPLEX)
541: min = PetscRealPart(*xx++); j = 0;
542: #else
543: min = *xx++; j = 0;
544: #endif
545: for (i=1; i<n; i++) {
546: #if defined(PETSC_USE_COMPLEX)
547: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
548: #else
549: if ((tmp = *xx++) < min) { j = i; min = tmp; }
550: #endif
551: }
552: }
553: *z = min;
554: if (idx) *idx = j;
555: return(0);
556: }
560: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
561: {
562: Vec_Seq *x = (Vec_Seq *)xin->data;
564: PetscInt i,n = xin->map->n;
565: PetscScalar *xx = x->array;
568: if (alpha == 0.0) {
569: PetscMemzero(xx,n*sizeof(PetscScalar));
570: } else {
571: for (i=0; i<n; i++) xx[i] = alpha;
572: }
573: return(0);
574: }
578: PetscErrorCode VecSetRandom_Seq(Vec xin,PetscRandom r)
579: {
581: PetscInt n = xin->map->n,i;
582: PetscScalar *xx;
585: VecGetArray(xin,&xx);
586: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
587: VecRestoreArray(xin,&xx);
588: return(0);
589: }
593: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
594: {
595: Vec_Seq *xdata = (Vec_Seq*)xin->data;
596: PetscErrorCode ierr;
597: PetscInt n = xin->map->n,j,j_rem;
598: const PetscScalar *yy0,*yy1,*yy2,*yy3;
599: PetscScalar *xx,alpha0,alpha1,alpha2,alpha3;
601: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
602: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
603: #endif
606: PetscLogFlops(nv*2.0*n);
608: xx = xdata->array;
609: switch (j_rem=nv&0x3) {
610: case 3:
611: VecGetArray(y[0],(PetscScalar**)&yy0);
612: VecGetArray(y[1],(PetscScalar**)&yy1);
613: VecGetArray(y[2],(PetscScalar**)&yy2);
614: alpha0 = alpha[0];
615: alpha1 = alpha[1];
616: alpha2 = alpha[2];
617: alpha += 3;
618: PetscAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
619: VecRestoreArray(y[0],(PetscScalar**)&yy0);
620: VecRestoreArray(y[1],(PetscScalar**)&yy1);
621: VecRestoreArray(y[2],(PetscScalar**)&yy2);
622: y += 3;
623: break;
624: case 2:
625: VecGetArray(y[0],(PetscScalar**)&yy0);
626: VecGetArray(y[1],(PetscScalar**)&yy1);
627: alpha0 = alpha[0];
628: alpha1 = alpha[1];
629: alpha +=2;
630: PetscAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
631: VecRestoreArray(y[0],(PetscScalar**)&yy0);
632: VecRestoreArray(y[1],(PetscScalar**)&yy1);
633: y +=2;
634: break;
635: case 1:
636: VecGetArray(y[0],(PetscScalar**)&yy0);
637: alpha0 = *alpha++;
638: PetscAXPY(xx,alpha0,yy0,n);
639: VecRestoreArray(y[0],(PetscScalar**)&yy0);
640: y +=1;
641: break;
642: }
643: for (j=j_rem; j<nv; j+=4) {
644: VecGetArray(y[0],(PetscScalar**)&yy0);
645: VecGetArray(y[1],(PetscScalar**)&yy1);
646: VecGetArray(y[2],(PetscScalar**)&yy2);
647: VecGetArray(y[3],(PetscScalar**)&yy3);
648: alpha0 = alpha[0];
649: alpha1 = alpha[1];
650: alpha2 = alpha[2];
651: alpha3 = alpha[3];
652: alpha += 4;
654: PetscAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
655: VecRestoreArray(y[0],(PetscScalar**)&yy0);
656: VecRestoreArray(y[1],(PetscScalar**)&yy1);
657: VecRestoreArray(y[2],(PetscScalar**)&yy2);
658: VecRestoreArray(y[3],(PetscScalar**)&yy3);
659: y += 4;
660: }
661: return(0);
662: }
664: #include "../src/vec/vec/impls/seq/ftn-kernels/faypx.h"
667: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
668: {
669: Vec_Seq *y = (Vec_Seq *)yin->data;
670: PetscErrorCode ierr;
671: PetscInt n = yin->map->n;
672: PetscScalar *yy = y->array;
673: const PetscScalar *xx;
676: if (alpha == 0.0) {
677: VecCopy_Seq(xin,yin);
678: } else if (alpha == 1.0) {
679: VecAXPY_Seq(yin,alpha,xin);
680: } else if (alpha == -1.0) {
681: PetscInt i;
682: VecGetArray(xin,(PetscScalar**)&xx);
683: for (i=0; i<n; i++) {
684: yy[i] = xx[i] - yy[i];
685: }
686: VecRestoreArray(xin,(PetscScalar**)&xx);
687: PetscLogFlops(1.0*n);
688: } else {
689: VecGetArray(xin,(PetscScalar**)&xx);
690: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
691: {
692: PetscScalar oalpha = alpha;
693: fortranaypx_(&n,&oalpha,xx,yy);
694: }
695: #else
696: {
697: PetscInt i;
698: for (i=0; i<n; i++) {
699: yy[i] = xx[i] + alpha*yy[i];
700: }
701: }
702: #endif
703: VecRestoreArray(xin,(PetscScalar**)&xx);
704: PetscLogFlops(2.0*n);
705: }
706: return(0);
707: }
709: #include "../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h"
710: /*
711: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
712: to be slower than a regular C loop. Hence,we do not include it.
713: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
714: */
718: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
719: {
720: Vec_Seq *w = (Vec_Seq *)win->data;
721: PetscErrorCode ierr;
722: PetscInt i,n = win->map->n;
723: PetscScalar *ww = w->array;
724: const PetscScalar *yy,*xx;
727: VecGetArray(yin,(PetscScalar**)&yy);
728: VecGetArray(xin,(PetscScalar**)&xx);
729: if (alpha == 1.0) {
730: PetscLogFlops(n);
731: /* could call BLAS axpy after call to memcopy, but may be slower */
732: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
733: } else if (alpha == -1.0) {
734: PetscLogFlops(n);
735: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
736: } else if (alpha == 0.0) {
737: PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
738: } else {
739: PetscScalar oalpha = alpha;
740: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
741: fortranwaxpy_(&n,&oalpha,xx,yy,ww);
742: #else
743: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
744: #endif
745: PetscLogFlops(2.0*n);
746: }
747: VecRestoreArray(yin,(PetscScalar**)&yy);
748: VecRestoreArray(xin,(PetscScalar**)&xx);
749: return(0);
750: }
754: PetscErrorCode VecPointwiseMax_Seq(Vec win,Vec xin,Vec yin)
755: {
756: Vec_Seq *w = (Vec_Seq *)win->data;
758: PetscInt n = win->map->n,i;
759: PetscScalar *ww = w->array,*xx,*yy;
762: VecGetArray(xin,&xx);
763: if (xin != yin) {
764: VecGetArray(yin,&yy);
765: } else {
766: yy = xx;
767: }
768: for (i=0; i<n; i++) {
769: ww[i] = PetscMax(PetscRealPart(xx[i]),PetscRealPart(yy[i]));
770: }
771: VecRestoreArray(xin,&xx);
772: if (xin != yin) {
773: VecRestoreArray(yin,&yy);
774: }
775: PetscLogFlops(n);
776: return(0);
777: }
781: PetscErrorCode VecPointwiseMin_Seq(Vec win,Vec xin,Vec yin)
782: {
783: Vec_Seq *w = (Vec_Seq *)win->data;
785: PetscInt n = win->map->n,i;
786: PetscScalar *ww = w->array,*xx,*yy;
789: VecGetArray(xin,&xx);
790: if (xin != yin) {
791: VecGetArray(yin,&yy);
792: } else {
793: yy = xx;
794: }
795: for (i=0; i<n; i++) {
796: ww[i] = PetscMin(PetscRealPart(xx[i]),PetscRealPart(yy[i]));
797: }
798: VecRestoreArray(xin,&xx);
799: if (xin != yin) {
800: VecRestoreArray(yin,&yy);
801: }
802: PetscLogFlops(n);
803: return(0);
804: }
808: PetscErrorCode VecPointwiseMaxAbs_Seq(Vec win,Vec xin,Vec yin)
809: {
810: Vec_Seq *w = (Vec_Seq *)win->data;
812: PetscInt n = win->map->n,i;
813: PetscScalar *ww = w->array,*xx,*yy;
816: VecGetArray(xin,&xx);
817: if (xin != yin) {
818: VecGetArray(yin,&yy);
819: } else {
820: yy = xx;
821: }
822: for (i=0; i<n; i++) {
823: ww[i] = PetscMax(PetscAbsScalar(xx[i]),PetscAbsScalar(yy[i]));
824: }
825: VecRestoreArray(xin,&xx);
826: if (xin != yin) {
827: VecRestoreArray(yin,&yy);
828: }
829: PetscLogFlops(n);
830: return(0);
831: }
833: #include "../src/vec/vec/impls/seq/ftn-kernels/fxtimesy.h"
836: PetscErrorCode VecPointwiseMult_Seq(Vec win,Vec xin,Vec yin)
837: {
838: Vec_Seq *w = (Vec_Seq *)win->data;
840: PetscInt n = win->map->n,i;
841: PetscScalar *ww = w->array,*xx,*yy;
844: VecGetArray(xin,&xx);
845: if (xin != yin) {
846: VecGetArray(yin,&yy);
847: } else {
848: yy = xx;
849: }
851: if (ww == xx) {
852: for (i=0; i<n; i++) ww[i] *= yy[i];
853: } else if (ww == yy) {
854: for (i=0; i<n; i++) ww[i] *= xx[i];
855: } else {
856: /* This was suppose to help on SGI but didn't really seem to
857: PetscReal * PETSC_RESTRICT www = ww;
858: PetscReal * PETSC_RESTRICT yyy = yy;
859: PetscReal * PETSC_RESTRICT xxx = xx;
860: for (i=0; i<n; i++) www[i] = xxx[i] * yyy[i];
861: */
862: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
863: fortranxtimesy_(xx,yy,ww,&n);
864: #else
865: for (i=0; i<n; i++) ww[i] = xx[i] * yy[i];
866: #endif
867: }
868: VecRestoreArray(xin,&xx);
869: if (xin != yin) {
870: VecRestoreArray(yin,&yy);
871: }
872: PetscLogFlops(n);
873: return(0);
874: }
878: PetscErrorCode VecPointwiseDivide_Seq(Vec win,Vec xin,Vec yin)
879: {
880: Vec_Seq *w = (Vec_Seq *)win->data;
882: PetscInt n = win->map->n,i;
883: PetscScalar *ww = w->array,*xx,*yy;
886: VecGetArray(xin,&xx);
887: if (xin != yin) {
888: VecGetArray(yin,&yy);
889: } else {
890: yy = xx;
891: }
892: for (i=0; i<n; i++) {
893: ww[i] = xx[i] / yy[i];
894: }
895: VecRestoreArray(xin,&xx);
896: if (xin != yin) {
897: VecRestoreArray(yin,&yy);
898: }
899: PetscLogFlops(n);
900: return(0);
901: }
905: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
906: {
908: PetscInt n = xin->map->n,i;
909: PetscScalar *xx,*yy;
910: PetscReal m = 0.0;
913: VecGetArray(yin,&yy);
914: VecGetArray(xin,&xx);
915: for(i = 0; i < n; i++) {
916: if (yy[i] != 0.0) {
917: m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
918: } else {
919: m = PetscMax(PetscAbsScalar(xx[i]), m);
920: }
921: }
922: VecRestoreArray(yin,&yy);
923: VecRestoreArray(xin,&xx);
924: MPI_Allreduce(&m,max,1,MPIU_REAL,MPI_MAX,((PetscObject)xin)->comm);
925: PetscLogFlops(n);
926: return(0);
927: }
931: PetscErrorCode VecGetArray_Seq(Vec vin,PetscScalar *a[])
932: {
933: Vec_Seq *v = (Vec_Seq *)vin->data;
937: if (vin->array_gotten) {
938: SETERRQ(PETSC_ERR_ORDER,"Array has already been gotten for this vector,you may\n\
939: have forgotten a call to VecRestoreArray()");
940: }
941: vin->array_gotten = PETSC_TRUE;
943: *a = v->array;
944: PetscObjectTakeAccess(vin);
945: return(0);
946: }
950: PetscErrorCode VecRestoreArray_Seq(Vec vin,PetscScalar *a[])
951: {
955: if (!vin->array_gotten) {
956: SETERRQ(PETSC_ERR_ORDER,"Array has not been gotten for this vector, you may\n\
957: have forgotten a call to VecGetArray()");
958: }
959: vin->array_gotten = PETSC_FALSE;
960: if (a) *a = 0; /* now user cannot accidently use it again */
962: PetscObjectGrantAccess(vin);
963: return(0);
964: }
968: PetscErrorCode VecResetArray_Seq(Vec vin)
969: {
970: Vec_Seq *v = (Vec_Seq *)vin->data;
973: v->array = v->unplacedarray;
974: v->unplacedarray = 0;
975: return(0);
976: }
980: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
981: {
982: Vec_Seq *v = (Vec_Seq *)vin->data;
985: if (v->unplacedarray) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
986: v->unplacedarray = v->array; /* save previous array so reset can bring it back */
987: v->array = (PetscScalar *)a;
988: return(0);
989: }
993: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
994: {
995: Vec_Seq *v = (Vec_Seq *)vin->data;
999: PetscFree(v->array_allocated);
1000: v->array_allocated = v->array = (PetscScalar *)a;
1001: return(0);
1002: }
1006: PetscErrorCode VecGetSize_Seq(Vec vin,PetscInt *size)
1007: {
1009: *size = vin->map->n;
1010: return(0);
1011: }
1015: PetscErrorCode VecConjugate_Seq(Vec xin)
1016: {
1017: PetscScalar *x = ((Vec_Seq *)xin->data)->array;
1018: PetscInt n = xin->map->n;
1021: while (n-->0) {
1022: *x = PetscConj(*x);
1023: x++;
1024: }
1025: return(0);
1026: }
1027: