Actual source code: dvec2.c

  1: #define PETSCVEC_DLL

  3: /* 
  4:    Defines some vector operation functions that are shared by 
  5:   sequential and parallel vectors.
  6: */
 7:  #include ../src/vec/vec/impls/dvecimpl.h
 8:  #include private/petscaxpy.h

 10: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 11: #include "../src/vec/vec/impls/seq/ftn-kernels/fmdot.h"
 14: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 15: {
 16:   Vec_Seq           *xv = (Vec_Seq *)xin->data;
 17:   PetscErrorCode    ierr;
 18:   PetscInt          i,nv_rem,n = xin->map->n;
 19:   PetscScalar       sum0,sum1,sum2,sum3;
 20:   const PetscScalar *yy0,*yy1,*yy2,*yy3,*x;
 21:   Vec               *yy;

 24:   sum0 = 0.0;
 25:   sum1 = 0.0;
 26:   sum2 = 0.0;

 28:   i      = nv;
 29:   nv_rem = nv&0x3;
 30:   yy     = (Vec*)yin;
 31:   x      = xv->array;

 33:   switch (nv_rem) {
 34:   case 3:
 35:     VecGetArray(yy[0],(PetscScalar**)&yy0);
 36:     VecGetArray(yy[1],(PetscScalar**)&yy1);
 37:     VecGetArray(yy[2],(PetscScalar**)&yy2);
 38:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 39:     VecRestoreArray(yy[0],(PetscScalar**)&yy0);
 40:     VecRestoreArray(yy[1],(PetscScalar**)&yy1);
 41:     VecRestoreArray(yy[2],(PetscScalar**)&yy2);
 42:     z[0] = sum0;
 43:     z[1] = sum1;
 44:     z[2] = sum2;
 45:     break;
 46:   case 2:
 47:     VecGetArray(yy[0],(PetscScalar**)&yy0);
 48:     VecGetArray(yy[1],(PetscScalar**)&yy1);
 49:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 50:     VecRestoreArray(yy[0],(PetscScalar**)&yy0);
 51:     VecRestoreArray(yy[1],(PetscScalar**)&yy1);
 52:     z[0] = sum0;
 53:     z[1] = sum1;
 54:     break;
 55:   case 1:
 56:     VecGetArray(yy[0],(PetscScalar**)&yy0);
 57:     fortranmdot1_(x,yy0,&n,&sum0);
 58:     VecRestoreArray(yy[0],(PetscScalar**)&yy0);
 59:     z[0] = sum0;
 60:     break;
 61:   case 0:
 62:     break;
 63:   }
 64:   z  += nv_rem;
 65:   i  -= nv_rem;
 66:   yy += nv_rem;

 68:   while (i >0) {
 69:     sum0 = 0.;
 70:     sum1 = 0.;
 71:     sum2 = 0.;
 72:     sum3 = 0.;
 73:     VecGetArray(yy[0],(PetscScalar**)&yy0);
 74:     VecGetArray(yy[1],(PetscScalar**)&yy1);
 75:     VecGetArray(yy[2],(PetscScalar**)&yy2);
 76:     VecGetArray(yy[3],(PetscScalar**)&yy3);
 77:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 78:     VecRestoreArray(yy[0],(PetscScalar**)&yy0);
 79:     VecRestoreArray(yy[1],(PetscScalar**)&yy1);
 80:     VecRestoreArray(yy[2],(PetscScalar**)&yy2);
 81:     VecRestoreArray(yy[3],(PetscScalar**)&yy3);
 82:     yy  += 4;
 83:     z[0] = sum0;
 84:     z[1] = sum1;
 85:     z[2] = sum2;
 86:     z[3] = sum3;
 87:     z   += 4;
 88:     i   -= 4;
 89:   }
 90:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
 91:   return(0);
 92: }

 94: #else
 97: PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
 98: {
 99:   Vec_Seq           *xv = (Vec_Seq *)xin->data;
100:   PetscErrorCode    ierr;
101:   PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
102:   PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
103:   const PetscScalar * PETSC_RESTRICT yy0,* PETSC_RESTRICT yy1,* PETSC_RESTRICT yy2,*PETSC_RESTRICT yy3,* PETSC_RESTRICT x;
104:   Vec               *yy;

107:   sum0 = 0.;
108:   sum1 = 0.;
109:   sum2 = 0.;

111:   i      = nv;
112:   nv_rem = nv&0x3;
113:   yy     = (Vec *)yin;
114:   j      = n;
115:   x      = xv->array;

117:   switch (nv_rem) {
118:   case 3:
119:     VecGetArray(yy[0],(PetscScalar **)&yy0);
120:     VecGetArray(yy[1],(PetscScalar **)&yy1);
121:     VecGetArray(yy[2],(PetscScalar **)&yy2);
122:     switch (j_rem=j&0x3) {
123:     case 3:
124:       x2 = x[2];
125:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
126:       sum2 += x2*PetscConj(yy2[2]);
127:     case 2:
128:       x1 = x[1];
129:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
130:       sum2 += x1*PetscConj(yy2[1]);
131:     case 1:
132:       x0 = x[0];
133:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
134:       sum2 += x0*PetscConj(yy2[0]);
135:     case 0:
136:       x   += j_rem;
137:       yy0 += j_rem;
138:       yy1 += j_rem;
139:       yy2 += j_rem;
140:       j   -= j_rem;
141:       break;
142:     }
143:     while (j>0) {
144:       x0 = x[0];
145:       x1 = x[1];
146:       x2 = x[2];
147:       x3 = x[3];
148:       x += 4;
149: 
150:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
151:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
152:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
153:       j -= 4;
154:     }
155:     z[0] = sum0;
156:     z[1] = sum1;
157:     z[2] = sum2;
158:     VecRestoreArray(yy[0],(PetscScalar **)&yy0);
159:     VecRestoreArray(yy[1],(PetscScalar **)&yy1);
160:     VecRestoreArray(yy[2],(PetscScalar **)&yy2);
161:     break;
162:   case 2:
163:     VecGetArray(yy[0],(PetscScalar **)&yy0);
164:     VecGetArray(yy[1],(PetscScalar **)&yy1);
165:     switch (j_rem=j&0x3) {
166:     case 3:
167:       x2 = x[2];
168:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
169:     case 2:
170:       x1 = x[1];
171:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
172:     case 1:
173:       x0 = x[0];
174:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
175:     case 0:
176:       x   += j_rem;
177:       yy0 += j_rem;
178:       yy1 += j_rem;
179:       j   -= j_rem;
180:       break;
181:     }
182:     while (j>0) {
183:       x0 = x[0];
184:       x1 = x[1];
185:       x2 = x[2];
186:       x3 = x[3];
187:       x += 4;
188: 
189:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
190:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
191:       j -= 4;
192:     }
193:     z[0] = sum0;
194:     z[1] = sum1;
195: 
196:     VecRestoreArray(yy[0],(PetscScalar **)&yy0);
197:     VecRestoreArray(yy[1],(PetscScalar **)&yy1);
198:     break;
199:   case 1:
200:     VecGetArray(yy[0],(PetscScalar **)&yy0);
201:     switch (j_rem=j&0x3) {
202:     case 3:
203:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
204:     case 2:
205:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
206:     case 1:
207:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
208:     case 0:
209:       x   += j_rem;
210:       yy0 += j_rem;
211:       j   -= j_rem;
212:       break;
213:     }
214:     while (j>0) {
215:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
216:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
217:       yy0+=4;
218:       j -= 4; x+=4;
219:     }
220:     z[0] = sum0;

222:     VecRestoreArray(yy[0],(PetscScalar **)&yy0);
223:     break;
224:   case 0:
225:     break;
226:   }
227:   z  += nv_rem;
228:   i  -= nv_rem;
229:   yy += nv_rem;

231:   while (i >0) {
232:     sum0 = 0.;
233:     sum1 = 0.;
234:     sum2 = 0.;
235:     sum3 = 0.;
236:     VecGetArray(yy[0],(PetscScalar **)&yy0);
237:     VecGetArray(yy[1],(PetscScalar **)&yy1);
238:     VecGetArray(yy[2],(PetscScalar **)&yy2);
239:     VecGetArray(yy[3],(PetscScalar **)&yy3);

241:     j = n;
242:     x = xv->array;
243:     switch (j_rem=j&0x3) {
244:     case 3:
245:       x2 = x[2];
246:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
247:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
248:     case 2:
249:       x1 = x[1];
250:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
251:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
252:     case 1:
253:       x0 = x[0];
254:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
255:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
256:     case 0:
257:       x   += j_rem;
258:       yy0 += j_rem;
259:       yy1 += j_rem;
260:       yy2 += j_rem;
261:       yy3 += j_rem;
262:       j   -= j_rem;
263:       break;
264:     }
265:     while (j>0) {
266:       x0 = x[0];
267:       x1 = x[1];
268:       x2 = x[2];
269:       x3 = x[3];
270:       x += 4;
271: 
272:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
273:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
274:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
275:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
276:       j -= 4;
277:     }
278:     z[0] = sum0;
279:     z[1] = sum1;
280:     z[2] = sum2;
281:     z[3] = sum3;
282:     z   += 4;
283:     i   -= 4;
284:     VecRestoreArray(yy[0],(PetscScalar **)&yy0);
285:     VecRestoreArray(yy[1],(PetscScalar **)&yy1);
286:     VecRestoreArray(yy[2],(PetscScalar **)&yy2);
287:     VecRestoreArray(yy[3],(PetscScalar **)&yy3);
288:     yy  += 4;
289:   }
290:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
291:   return(0);
292: }
293: #endif

295: /* ----------------------------------------------------------------------------*/
298: PetscErrorCode VecMTDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
299: {
300:   Vec_Seq        *xv = (Vec_Seq *)xin->data;
302:   PetscInt       n = xin->map->n,i,j,nv_rem,j_rem;
303:   PetscScalar    sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,x0,x1,x2,x3,*x;
304:   Vec            *yy;
305: 

308:   sum0 = 0.;
309:   sum1 = 0.;
310:   sum2 = 0.;

312:   i      = nv;
313:   nv_rem = nv&0x3;
314:   yy     = (Vec*)yin;
315:   j    = n;
316:   x    = xv->array;

318:   switch (nv_rem) {
319:   case 3:
320:     VecGetArray(yy[0],&yy0);
321:     VecGetArray(yy[1],&yy1);
322:     VecGetArray(yy[2],&yy2);
323:     switch (j_rem=j&0x3) {
324:     case 3:
325:       x2 = x[2];
326:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
327:       sum2 += x2*yy2[2];
328:     case 2:
329:       x1 = x[1];
330:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
331:       sum2 += x1*yy2[1];
332:     case 1:
333:       x0 = x[0];
334:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
335:       sum2 += x0*yy2[0];
336:     case 0:
337:       x  += j_rem;
338:       yy0 += j_rem;
339:       yy1 += j_rem;
340:       yy2 += j_rem;
341:       j  -= j_rem;
342:       break;
343:     }
344:     while (j>0) {
345:       x0 = x[0];
346:       x1 = x[1];
347:       x2 = x[2];
348:       x3 = x[3];
349:       x += 4;
350: 
351:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
352:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
353:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
354:       j -= 4;
355:     }
356:     z[0] = sum0;
357:     z[1] = sum1;
358:     z[2] = sum2;
359:     VecRestoreArray(yy[0],&yy0);
360:     VecRestoreArray(yy[1],&yy1);
361:     VecRestoreArray(yy[2],&yy2);
362:     break;
363:   case 2:
364:     VecGetArray(yy[0],&yy0);
365:     VecGetArray(yy[1],&yy1);
366:     switch (j_rem=j&0x3) {
367:     case 3:
368:       x2 = x[2];
369:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
370:     case 2:
371:       x1 = x[1];
372:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
373:     case 1:
374:       x0 = x[0];
375:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
376:     case 0:
377:       x  += j_rem;
378:       yy0 += j_rem;
379:       yy1 += j_rem;
380:       j  -= j_rem;
381:       break;
382:     }
383:     while (j>0) {
384:       x0 = x[0];
385:       x1 = x[1];
386:       x2 = x[2];
387:       x3 = x[3];
388:       x += 4;
389: 
390:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
391:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
392:       j -= 4;
393:     }
394:     z[0] = sum0;
395:     z[1] = sum1;
396: 
397:     VecRestoreArray(yy[0],&yy0);
398:     VecRestoreArray(yy[1],&yy1);
399:     break;
400:   case 1:
401:     VecGetArray(yy[0],&yy0);
402:     switch (j_rem=j&0x3) {
403:     case 3:
404:       x2 = x[2]; sum0 += x2*yy0[2];
405:     case 2:
406:       x1 = x[1]; sum0 += x1*yy0[1];
407:     case 1:
408:       x0 = x[0]; sum0 += x0*yy0[0];
409:     case 0:
410:       x  += j_rem;
411:       yy0 += j_rem;
412:       j  -= j_rem;
413:       break;
414:     }
415:     while (j>0) {
416:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
417:       j -= 4; x+=4;
418:     }
419:     z[0] = sum0;

421:     VecRestoreArray(yy[0],&yy0);
422:     break;
423:   case 0:
424:     break;
425:   }
426:   z  += nv_rem;
427:   i  -= nv_rem;
428:   yy += nv_rem;

430:   while (i >0) {
431:     sum0 = 0.;
432:     sum1 = 0.;
433:     sum2 = 0.;
434:     sum3 = 0.;
435:     VecGetArray(yy[0],&yy0);
436:     VecGetArray(yy[1],&yy1);
437:     VecGetArray(yy[2],&yy2);
438:     VecGetArray(yy[3],&yy3);

440:     j = n;
441:     x = xv->array;
442:     switch (j_rem=j&0x3) {
443:     case 3:
444:       x2 = x[2];
445:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
446:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
447:     case 2:
448:       x1 = x[1];
449:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
450:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
451:     case 1:
452:       x0 = x[0];
453:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
454:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
455:     case 0:
456:       x  += j_rem;
457:       yy0 += j_rem;
458:       yy1 += j_rem;
459:       yy2 += j_rem;
460:       yy3 += j_rem;
461:       j  -= j_rem;
462:       break;
463:     }
464:     while (j>0) {
465:       x0 = x[0];
466:       x1 = x[1];
467:       x2 = x[2];
468:       x3 = x[3];
469:       x += 4;
470: 
471:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
472:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
473:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
474:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
475:       j -= 4;
476:     }
477:     z[0] = sum0;
478:     z[1] = sum1;
479:     z[2] = sum2;
480:     z[3] = sum3;
481:     z   += 4;
482:     i   -= 4;
483:     VecRestoreArray(yy[0],&yy0);
484:     VecRestoreArray(yy[1],&yy1);
485:     VecRestoreArray(yy[2],&yy2);
486:     VecRestoreArray(yy[3],&yy3);
487:     yy  += 4;
488:   }
489:   PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));
490:   return(0);
491: }
492: 

496: PetscErrorCode VecMax_Seq(Vec xin,PetscInt* idx,PetscReal * z)
497: {
498:   Vec_Seq      *x = (Vec_Seq*)xin->data;
499:   PetscInt     i,j=0,n = xin->map->n;
500:   PetscReal    max,tmp;
501:   PetscScalar  *xx = x->array;

504:   if (!n) {
505:     max = PETSC_MIN;
506:     j   = -1;
507:   } else {
508: #if defined(PETSC_USE_COMPLEX)
509:       max = PetscRealPart(*xx++); j = 0;
510: #else
511:       max = *xx++; j = 0;
512: #endif
513:     for (i=1; i<n; i++) {
514: #if defined(PETSC_USE_COMPLEX)
515:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
516: #else
517:       if ((tmp = *xx++) > max) { j = i; max = tmp; }
518: #endif
519:     }
520:   }
521:   *z   = max;
522:   if (idx) *idx = j;
523:   return(0);
524: }

528: PetscErrorCode VecMin_Seq(Vec xin,PetscInt* idx,PetscReal * z)
529: {
530:   Vec_Seq      *x = (Vec_Seq*)xin->data;
531:   PetscInt     i,j=0,n = xin->map->n;
532:   PetscReal    min,tmp;
533:   PetscScalar  *xx = x->array;

536:   if (!n) {
537:     min = PETSC_MAX;
538:     j   = -1;
539:   } else {
540: #if defined(PETSC_USE_COMPLEX)
541:     min = PetscRealPart(*xx++); j = 0;
542: #else
543:     min = *xx++; j = 0;
544: #endif
545:     for (i=1; i<n; i++) {
546: #if defined(PETSC_USE_COMPLEX)
547:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
548: #else
549:       if ((tmp = *xx++) < min) { j = i; min = tmp; }
550: #endif
551:     }
552:   }
553:   *z   = min;
554:   if (idx) *idx = j;
555:   return(0);
556: }

560: PetscErrorCode VecSet_Seq(Vec xin,PetscScalar alpha)
561: {
562:   Vec_Seq        *x = (Vec_Seq *)xin->data;
564:   PetscInt       i,n = xin->map->n;
565:   PetscScalar    *xx = x->array;

568:   if (alpha == 0.0) {
569:     PetscMemzero(xx,n*sizeof(PetscScalar));
570:   } else {
571:     for (i=0; i<n; i++) xx[i] = alpha;
572:   }
573:   return(0);
574: }

578: PetscErrorCode VecSetRandom_Seq(Vec xin,PetscRandom r)
579: {
581:   PetscInt       n = xin->map->n,i;
582:   PetscScalar    *xx;

585:   VecGetArray(xin,&xx);
586:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
587:   VecRestoreArray(xin,&xx);
588:   return(0);
589: }

593: PetscErrorCode VecMAXPY_Seq(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
594: {
595:   Vec_Seq           *xdata = (Vec_Seq*)xin->data;
596:   PetscErrorCode    ierr;
597:   PetscInt          n = xin->map->n,j,j_rem;
598:   const PetscScalar *yy0,*yy1,*yy2,*yy3;
599:   PetscScalar       *xx,alpha0,alpha1,alpha2,alpha3;

601: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
602: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
603: #endif

606:   PetscLogFlops(nv*2.0*n);

608:   xx = xdata->array;
609:   switch (j_rem=nv&0x3) {
610:   case 3:
611:     VecGetArray(y[0],(PetscScalar**)&yy0);
612:     VecGetArray(y[1],(PetscScalar**)&yy1);
613:     VecGetArray(y[2],(PetscScalar**)&yy2);
614:     alpha0 = alpha[0];
615:     alpha1 = alpha[1];
616:     alpha2 = alpha[2];
617:     alpha += 3;
618:     PetscAXPY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
619:     VecRestoreArray(y[0],(PetscScalar**)&yy0);
620:     VecRestoreArray(y[1],(PetscScalar**)&yy1);
621:     VecRestoreArray(y[2],(PetscScalar**)&yy2);
622:     y     += 3;
623:     break;
624:   case 2:
625:     VecGetArray(y[0],(PetscScalar**)&yy0);
626:     VecGetArray(y[1],(PetscScalar**)&yy1);
627:     alpha0 = alpha[0];
628:     alpha1 = alpha[1];
629:     alpha +=2;
630:     PetscAXPY2(xx,alpha0,alpha1,yy0,yy1,n);
631:     VecRestoreArray(y[0],(PetscScalar**)&yy0);
632:     VecRestoreArray(y[1],(PetscScalar**)&yy1);
633:     y     +=2;
634:     break;
635:   case 1:
636:     VecGetArray(y[0],(PetscScalar**)&yy0);
637:     alpha0 = *alpha++;
638:     PetscAXPY(xx,alpha0,yy0,n);
639:     VecRestoreArray(y[0],(PetscScalar**)&yy0);
640:     y     +=1;
641:     break;
642:   }
643:   for (j=j_rem; j<nv; j+=4) {
644:     VecGetArray(y[0],(PetscScalar**)&yy0);
645:     VecGetArray(y[1],(PetscScalar**)&yy1);
646:     VecGetArray(y[2],(PetscScalar**)&yy2);
647:     VecGetArray(y[3],(PetscScalar**)&yy3);
648:     alpha0 = alpha[0];
649:     alpha1 = alpha[1];
650:     alpha2 = alpha[2];
651:     alpha3 = alpha[3];
652:     alpha  += 4;

654:     PetscAXPY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
655:     VecRestoreArray(y[0],(PetscScalar**)&yy0);
656:     VecRestoreArray(y[1],(PetscScalar**)&yy1);
657:     VecRestoreArray(y[2],(PetscScalar**)&yy2);
658:     VecRestoreArray(y[3],(PetscScalar**)&yy3);
659:     y      += 4;
660:   }
661:   return(0);
662: }

664: #include "../src/vec/vec/impls/seq/ftn-kernels/faypx.h"
667: PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin)
668: {
669:   Vec_Seq           *y = (Vec_Seq *)yin->data;
670:   PetscErrorCode    ierr;
671:   PetscInt          n = yin->map->n;
672:   PetscScalar       *yy = y->array;
673:   const PetscScalar *xx;

676:   if (alpha == 0.0) {
677:     VecCopy_Seq(xin,yin);
678:   } else if (alpha == 1.0) {
679:     VecAXPY_Seq(yin,alpha,xin);
680:   } else if (alpha == -1.0) {
681:     PetscInt i;
682:     VecGetArray(xin,(PetscScalar**)&xx);
683:     for (i=0; i<n; i++) {
684:       yy[i] = xx[i] - yy[i];
685:     }
686:     VecRestoreArray(xin,(PetscScalar**)&xx);
687:     PetscLogFlops(1.0*n);
688:   } else {
689:     VecGetArray(xin,(PetscScalar**)&xx);
690: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
691:     {
692:       PetscScalar oalpha = alpha;
693:       fortranaypx_(&n,&oalpha,xx,yy);
694:     }
695: #else
696:     {
697:       PetscInt i;
698:       for (i=0; i<n; i++) {
699:         yy[i] = xx[i] + alpha*yy[i];
700:       }
701:     }
702: #endif
703:     VecRestoreArray(xin,(PetscScalar**)&xx);
704:     PetscLogFlops(2.0*n);
705:   }
706:   return(0);
707: }

709: #include "../src/vec/vec/impls/seq/ftn-kernels/fwaxpy.h"
710: /*
711:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
712:   to be slower than a regular C loop.  Hence,we do not include it.
713:   void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
714: */

718: PetscErrorCode VecWAXPY_Seq(Vec win, PetscScalar alpha,Vec xin,Vec yin)
719: {
720:   Vec_Seq            *w = (Vec_Seq *)win->data;
721:   PetscErrorCode     ierr;
722:   PetscInt           i,n = win->map->n;
723:   PetscScalar        *ww = w->array;
724:   const PetscScalar  *yy,*xx;

727:   VecGetArray(yin,(PetscScalar**)&yy);
728:   VecGetArray(xin,(PetscScalar**)&xx);
729:   if (alpha == 1.0) {
730:     PetscLogFlops(n);
731:     /* could call BLAS axpy after call to memcopy, but may be slower */
732:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
733:   } else if (alpha == -1.0) {
734:     PetscLogFlops(n);
735:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
736:   } else if (alpha == 0.0) {
737:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
738:   } else {
739:     PetscScalar oalpha = alpha;
740: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
741:     fortranwaxpy_(&n,&oalpha,xx,yy,ww);
742: #else
743:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
744: #endif
745:     PetscLogFlops(2.0*n);
746:   }
747:   VecRestoreArray(yin,(PetscScalar**)&yy);
748:   VecRestoreArray(xin,(PetscScalar**)&xx);
749:   return(0);
750: }

754: PetscErrorCode VecPointwiseMax_Seq(Vec win,Vec xin,Vec yin)
755: {
756:   Vec_Seq        *w = (Vec_Seq *)win->data;
758:   PetscInt       n = win->map->n,i;
759:   PetscScalar    *ww = w->array,*xx,*yy;

762:   VecGetArray(xin,&xx);
763:   if (xin != yin) {
764:     VecGetArray(yin,&yy);
765:   } else {
766:     yy = xx;
767:   }
768:   for (i=0; i<n; i++) {
769:     ww[i] = PetscMax(PetscRealPart(xx[i]),PetscRealPart(yy[i]));
770:   }
771:   VecRestoreArray(xin,&xx);
772:   if (xin != yin) {
773:     VecRestoreArray(yin,&yy);
774:   }
775:   PetscLogFlops(n);
776:   return(0);
777: }

781: PetscErrorCode VecPointwiseMin_Seq(Vec win,Vec xin,Vec yin)
782: {
783:   Vec_Seq        *w = (Vec_Seq *)win->data;
785:   PetscInt       n = win->map->n,i;
786:   PetscScalar    *ww = w->array,*xx,*yy;

789:   VecGetArray(xin,&xx);
790:   if (xin != yin) {
791:     VecGetArray(yin,&yy);
792:   } else {
793:     yy = xx;
794:   }
795:   for (i=0; i<n; i++) {
796:     ww[i] = PetscMin(PetscRealPart(xx[i]),PetscRealPart(yy[i]));
797:   }
798:   VecRestoreArray(xin,&xx);
799:   if (xin != yin) {
800:     VecRestoreArray(yin,&yy);
801:   }
802:   PetscLogFlops(n);
803:   return(0);
804: }

808: PetscErrorCode VecPointwiseMaxAbs_Seq(Vec win,Vec xin,Vec yin)
809: {
810:   Vec_Seq        *w = (Vec_Seq *)win->data;
812:   PetscInt       n = win->map->n,i;
813:   PetscScalar    *ww = w->array,*xx,*yy;

816:   VecGetArray(xin,&xx);
817:   if (xin != yin) {
818:     VecGetArray(yin,&yy);
819:   } else {
820:     yy = xx;
821:   }
822:   for (i=0; i<n; i++) {
823:     ww[i] = PetscMax(PetscAbsScalar(xx[i]),PetscAbsScalar(yy[i]));
824:   }
825:   VecRestoreArray(xin,&xx);
826:   if (xin != yin) {
827:     VecRestoreArray(yin,&yy);
828:   }
829:   PetscLogFlops(n);
830:   return(0);
831: }

833: #include "../src/vec/vec/impls/seq/ftn-kernels/fxtimesy.h"
836: PetscErrorCode VecPointwiseMult_Seq(Vec win,Vec xin,Vec yin)
837: {
838:   Vec_Seq        *w = (Vec_Seq *)win->data;
840:   PetscInt       n = win->map->n,i;
841:   PetscScalar    *ww = w->array,*xx,*yy;

844:   VecGetArray(xin,&xx);
845:   if (xin != yin) {
846:     VecGetArray(yin,&yy);
847:   } else {
848:     yy = xx;
849:   }

851:   if (ww == xx) {
852:     for (i=0; i<n; i++) ww[i] *= yy[i];
853:   } else if (ww == yy) {
854:     for (i=0; i<n; i++) ww[i] *= xx[i];
855:   } else {
856:     /*  This was suppose to help on SGI but didn't really seem to
857:           PetscReal * PETSC_RESTRICT www = ww;
858:           PetscReal * PETSC_RESTRICT yyy = yy;
859:           PetscReal * PETSC_RESTRICT xxx = xx;
860:           for (i=0; i<n; i++) www[i] = xxx[i] * yyy[i];
861:     */
862: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
863:     fortranxtimesy_(xx,yy,ww,&n);
864: #else
865:     for (i=0; i<n; i++) ww[i] = xx[i] * yy[i];
866: #endif
867:   }
868:   VecRestoreArray(xin,&xx);
869:   if (xin != yin) {
870:     VecRestoreArray(yin,&yy);
871:   }
872:   PetscLogFlops(n);
873:   return(0);
874: }

878: PetscErrorCode VecPointwiseDivide_Seq(Vec win,Vec xin,Vec yin)
879: {
880:   Vec_Seq        *w = (Vec_Seq *)win->data;
882:   PetscInt       n = win->map->n,i;
883:   PetscScalar    *ww = w->array,*xx,*yy;

886:   VecGetArray(xin,&xx);
887:   if (xin != yin) {
888:     VecGetArray(yin,&yy);
889:   } else {
890:     yy = xx;
891:   }
892:   for (i=0; i<n; i++) {
893:     ww[i] = xx[i] / yy[i];
894:   }
895:   VecRestoreArray(xin,&xx);
896:   if (xin != yin) {
897:     VecRestoreArray(yin,&yy);
898:   }
899:   PetscLogFlops(n);
900:   return(0);
901: }

905: PetscErrorCode VecMaxPointwiseDivide_Seq(Vec xin,Vec yin,PetscReal *max)
906: {
908:   PetscInt       n = xin->map->n,i;
909:   PetscScalar    *xx,*yy;
910:   PetscReal      m = 0.0;

913:   VecGetArray(yin,&yy);
914:   VecGetArray(xin,&xx);
915:   for(i = 0; i < n; i++) {
916:     if (yy[i] != 0.0) {
917:       m = PetscMax(PetscAbsScalar(xx[i]/yy[i]), m);
918:     } else {
919:       m = PetscMax(PetscAbsScalar(xx[i]), m);
920:     }
921:   }
922:   VecRestoreArray(yin,&yy);
923:   VecRestoreArray(xin,&xx);
924:   MPI_Allreduce(&m,max,1,MPIU_REAL,MPI_MAX,((PetscObject)xin)->comm);
925:   PetscLogFlops(n);
926:   return(0);
927: }

931: PetscErrorCode VecGetArray_Seq(Vec vin,PetscScalar *a[])
932: {
933:   Vec_Seq        *v = (Vec_Seq *)vin->data;

937:   if (vin->array_gotten) {
938:     SETERRQ(PETSC_ERR_ORDER,"Array has already been gotten for this vector,you may\n\
939:     have forgotten a call to VecRestoreArray()");
940:   }
941:   vin->array_gotten = PETSC_TRUE;

943:   *a =  v->array;
944:   PetscObjectTakeAccess(vin);
945:   return(0);
946: }

950: PetscErrorCode VecRestoreArray_Seq(Vec vin,PetscScalar *a[])
951: {

955:   if (!vin->array_gotten) {
956:     SETERRQ(PETSC_ERR_ORDER,"Array has not been gotten for this vector, you may\n\
957:     have forgotten a call to VecGetArray()");
958:   }
959:   vin->array_gotten = PETSC_FALSE;
960:   if (a) *a         = 0; /* now user cannot accidently use it again */

962:   PetscObjectGrantAccess(vin);
963:   return(0);
964: }

968: PetscErrorCode VecResetArray_Seq(Vec vin)
969: {
970:   Vec_Seq *v = (Vec_Seq *)vin->data;

973:   v->array         = v->unplacedarray;
974:   v->unplacedarray = 0;
975:   return(0);
976: }

980: PetscErrorCode VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
981: {
982:   Vec_Seq *v = (Vec_Seq *)vin->data;

985:   if (v->unplacedarray) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"VecPlaceArray() was already called on this vector, without a call to VecResetArray()");
986:   v->unplacedarray = v->array;  /* save previous array so reset can bring it back */
987:   v->array = (PetscScalar *)a;
988:   return(0);
989: }

993: PetscErrorCode VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
994: {
995:   Vec_Seq        *v = (Vec_Seq *)vin->data;

999:   PetscFree(v->array_allocated);
1000:   v->array_allocated = v->array = (PetscScalar *)a;
1001:   return(0);
1002: }

1006: PetscErrorCode VecGetSize_Seq(Vec vin,PetscInt *size)
1007: {
1009:   *size = vin->map->n;
1010:   return(0);
1011: }

1015: PetscErrorCode VecConjugate_Seq(Vec xin)
1016: {
1017:   PetscScalar *x = ((Vec_Seq *)xin->data)->array;
1018:   PetscInt    n = xin->map->n;

1021:   while (n-->0) {
1022:     *x = PetscConj(*x);
1023:     x++;
1024:   }
1025:   return(0);
1026: }
1027: