Actual source code: xxt.c

  1: #define PETSCKSP_DLL

  3: /*************************************xxt.c************************************
  4: Module Name: xxt
  5: Module Info:

  7: author:  Henry M. Tufo III
  8: e-mail:  hmt@asci.uchicago.edu
  9: contact:
 10: +--------------------------------+--------------------------------+
 11: |MCS Division - Building 221     |Department of Computer Science  |
 12: |Argonne National Laboratory     |Ryerson 152                     |
 13: |9700 S. Cass Avenue             |The University of Chicago       |
 14: |Argonne, IL  60439              |Chicago, IL  60637              |
 15: |(630) 252-5354/5986 ph/fx       |(773) 702-6019/8487 ph/fx       |
 16: +--------------------------------+--------------------------------+

 18: Last Modification: 3.20.01
 19: **************************************xxt.c***********************************/
 20:  #include ../src/ksp/pc/impls/tfs/tfs.h

 22: #define LEFT  -1
 23: #define RIGHT  1
 24: #define BOTH   0

 26: typedef struct xxt_solver_info {
 27:   PetscInt n, m, n_global, m_global;
 28:   PetscInt nnz, max_nnz, msg_buf_sz;
 29:   PetscInt *nsep, *lnsep, *fo, nfo, *stages;
 30:   PetscInt *col_sz, *col_indices;
 31:   PetscScalar **col_vals, *x, *solve_uu, *solve_w;
 32:   PetscInt nsolves;
 33:   PetscScalar tot_solve_time;
 34: } xxt_info;

 36: typedef struct matvec_info {
 37:   PetscInt n, m, n_global, m_global;
 38:   PetscInt *local2global;
 39:   gs_ADT gs_handle;
 40:   PetscErrorCode (*matvec)(struct matvec_info*,PetscScalar*,PetscScalar*);
 41:   void *grid_data;
 42: } mv_info;

 44: struct xxt_CDT{
 45:   PetscInt id;
 46:   PetscInt ns;
 47:   PetscInt level;
 48:   xxt_info *info;
 49:   mv_info  *mvi;
 50: };

 52: static PetscInt n_xxt=0;
 53: static PetscInt n_xxt_handles=0;

 55: /* prototypes */
 56: static PetscErrorCode do_xxt_solve(xxt_ADT xxt_handle, PetscScalar *rhs);
 57: static PetscErrorCode check_handle(xxt_ADT xxt_handle);
 58: static PetscErrorCode det_separators(xxt_ADT xxt_handle);
 59: static PetscErrorCode do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u);
 60: static PetscInt xxt_generate(xxt_ADT xxt_handle);
 61: static PetscInt do_xxt_factor(xxt_ADT xxt_handle);
 62: static mv_info *set_mvi(PetscInt *local2global, PetscInt n, PetscInt m, void *matvec, void *grid_data);

 64: /**************************************xxt.c***********************************/
 65: xxt_ADT XXT_new(void)
 66: {
 67:   xxt_ADT xxt_handle;

 69:   /* rolling count on n_xxt ... pot. problem here */
 70:   n_xxt_handles++;
 71:   xxt_handle       = (xxt_ADT)malloc(sizeof(struct xxt_CDT));
 72:   xxt_handle->id   = ++n_xxt;
 73:   xxt_handle->info = NULL; xxt_handle->mvi  = NULL;

 75:   return(xxt_handle);
 76: }

 78: /**************************************xxt.c***********************************/
 79: PetscInt XXT_factor(xxt_ADT xxt_handle, /* prev. allocated xxt  handle */
 80:            PetscInt *local2global,  /* global column mapping       */
 81:            PetscInt n,              /* local num rows              */
 82:            PetscInt m,              /* local num cols              */
 83:            void *matvec,       /* b_loc=A_local.x_loc         */
 84:            void *grid_data     /* grid data for matvec        */
 85:            )
 86: {
 87:   comm_init();
 88:   check_handle(xxt_handle);

 90:   /* only 2^k for now and all nodes participating */
 91:   if ((1<<(xxt_handle->level=i_log2_num_nodes))!=num_nodes)
 92:     {SETERRQ2(PETSC_ERR_PLIB,"only 2^k for now and MPI_COMM_WORLD!!! %D != %D\n",1<<i_log2_num_nodes,num_nodes);}

 94:   /* space for X info */
 95:   xxt_handle->info = (xxt_info*)malloc(sizeof(xxt_info));

 97:   /* set up matvec handles */
 98:   xxt_handle->mvi  = set_mvi(local2global, n, m, matvec, grid_data);

100:   /* matrix is assumed to be of full rank */
101:   /* LATER we can reset to indicate rank def. */
102:   xxt_handle->ns=0;

104:   /* determine separators and generate firing order - NB xxt info set here */
105:   det_separators(xxt_handle);

107:   return(do_xxt_factor(xxt_handle));
108: }

110: /**************************************xxt.c***********************************/
111: PetscInt XXT_solve(xxt_ADT xxt_handle, PetscScalar *x, PetscScalar *b)
112: {

114:   comm_init();
115:   check_handle(xxt_handle);

117:   /* need to copy b into x? */
118:   if (b)
119:     {rvec_copy(x,b,xxt_handle->mvi->n);}
120:   do_xxt_solve(xxt_handle,x);

122:   return(0);
123: }

125: /**************************************xxt.c***********************************/
126: PetscInt XXT_free(xxt_ADT xxt_handle)
127: {

129:   comm_init();
130:   check_handle(xxt_handle);
131:   n_xxt_handles--;

133:   free(xxt_handle->info->nsep);
134:   free(xxt_handle->info->lnsep);
135:   free(xxt_handle->info->fo);
136:   free(xxt_handle->info->stages);
137:   free(xxt_handle->info->solve_uu);
138:   free(xxt_handle->info->solve_w);
139:   free(xxt_handle->info->x);
140:   free(xxt_handle->info->col_vals);
141:   free(xxt_handle->info->col_sz);
142:   free(xxt_handle->info->col_indices);
143:   free(xxt_handle->info);
144:   free(xxt_handle->mvi->local2global);
145:    gs_free(xxt_handle->mvi->gs_handle);
146:   free(xxt_handle->mvi);
147:   free(xxt_handle);

149:   /* if the check fails we nuke */
150:   /* if NULL pointer passed to free we nuke */
151:   /* if the calls to free fail that's not my problem */
152:   return(0);
153: }

155: /**************************************xxt.c***********************************/
156: PetscInt XXT_stats(xxt_ADT xxt_handle)
157: {
158:   PetscInt  op[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD};
159:   PetscInt fop[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD};
160:   PetscInt   vals[9],  work[9];
161:   PetscScalar fvals[3], fwork[3];

164:   comm_init();
165:   check_handle(xxt_handle);

167:   /* if factorization not done there are no stats */
168:   if (!xxt_handle->info||!xxt_handle->mvi)
169:     {
170:       if (!my_id)
171:         {PetscPrintf(PETSC_COMM_WORLD,"XXT_stats() :: no stats available!\n");}
172:       return 1;
173:     }

175:   vals[0]=vals[1]=vals[2]=xxt_handle->info->nnz;
176:   vals[3]=vals[4]=vals[5]=xxt_handle->mvi->n;
177:   vals[6]=vals[7]=vals[8]=xxt_handle->info->msg_buf_sz;
178:   giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);

180:   fvals[0]=fvals[1]=fvals[2]
181:     =xxt_handle->info->tot_solve_time/xxt_handle->info->nsolves++;
182:   grop(fvals,fwork,sizeof(fop)/sizeof(fop[0])-1,fop);

184:   if (!my_id)
185:     {
186:       PetscPrintf(PETSC_COMM_WORLD,"%D :: min   xxt_nnz=%D\n",my_id,vals[0]);
187:       PetscPrintf(PETSC_COMM_WORLD,"%D :: max   xxt_nnz=%D\n",my_id,vals[1]);
188:       PetscPrintf(PETSC_COMM_WORLD,"%D :: avg   xxt_nnz=%g\n",my_id,1.0*vals[2]/num_nodes);
189:       PetscPrintf(PETSC_COMM_WORLD,"%D :: tot   xxt_nnz=%D\n",my_id,vals[2]);
190:       PetscPrintf(PETSC_COMM_WORLD,"%D :: xxt   C(2d)  =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.5)));
191:       PetscPrintf(PETSC_COMM_WORLD,"%D :: xxt   C(3d)  =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.6667)));
192:       PetscPrintf(PETSC_COMM_WORLD,"%D :: min   xxt_n  =%D\n",my_id,vals[3]);
193:       PetscPrintf(PETSC_COMM_WORLD,"%D :: max   xxt_n  =%D\n",my_id,vals[4]);
194:       PetscPrintf(PETSC_COMM_WORLD,"%D :: avg   xxt_n  =%g\n",my_id,1.0*vals[5]/num_nodes);
195:       PetscPrintf(PETSC_COMM_WORLD,"%D :: tot   xxt_n  =%D\n",my_id,vals[5]);
196:       PetscPrintf(PETSC_COMM_WORLD,"%D :: min   xxt_buf=%D\n",my_id,vals[6]);
197:       PetscPrintf(PETSC_COMM_WORLD,"%D :: max   xxt_buf=%D\n",my_id,vals[7]);
198:       PetscPrintf(PETSC_COMM_WORLD,"%D :: avg   xxt_buf=%g\n",my_id,1.0*vals[8]/num_nodes);
199:       PetscPrintf(PETSC_COMM_WORLD,"%D :: min   xxt_slv=%g\n",my_id,fvals[0]);
200:       PetscPrintf(PETSC_COMM_WORLD,"%D :: max   xxt_slv=%g\n",my_id,fvals[1]);
201:       PetscPrintf(PETSC_COMM_WORLD,"%D :: avg   xxt_slv=%g\n",my_id,fvals[2]/num_nodes);
202:     }

204:   return(0);
205: }

207: /*************************************xxt.c************************************

209: Description: get A_local, local portion of global coarse matrix which 
210: is a row dist. nxm matrix w/ n<m.
211:    o my_ml holds address of ML struct associated w/A_local and coarse grid
212:    o local2global holds global number of column i (i=0,...,m-1)
213:    o local2global holds global number of row    i (i=0,...,n-1)
214:    o mylocmatvec performs A_local . vec_local (note that gs is performed using 
215:    gs_init/gop).

217: mylocmatvec = my_ml->Amat[grid_tag].matvec->external;
218: mylocmatvec (void :: void *data, double *in, double *out)
219: **************************************xxt.c***********************************/
220: static PetscInt do_xxt_factor(xxt_ADT xxt_handle)
221: {
222:   return xxt_generate(xxt_handle);
223: }

225: /**************************************xxt.c***********************************/
226: static PetscInt xxt_generate(xxt_ADT xxt_handle)
227: {
228:   PetscInt i,j,k,idex;
229:   PetscInt dim, col;
230:   PetscScalar *u, *uu, *v, *z, *w, alpha, alpha_w;
231:   PetscInt *segs;
232:   PetscInt op[] = {GL_ADD,0};
233:   PetscInt off, len;
234:   PetscScalar *x_ptr;
235:   PetscInt *iptr, flag;
236:   PetscInt start=0, end, work;
237:   PetscInt op2[] = {GL_MIN,0};
238:   gs_ADT gs_handle;
239:   PetscInt *nsep, *lnsep, *fo;
240:   PetscInt a_n=xxt_handle->mvi->n;
241:   PetscInt a_m=xxt_handle->mvi->m;
242:   PetscInt *a_local2global=xxt_handle->mvi->local2global;
243:   PetscInt level;
244:   PetscInt xxt_nnz=0, xxt_max_nnz=0;
245:   PetscInt n, m;
246:   PetscInt *col_sz, *col_indices, *stages;
247:   PetscScalar **col_vals, *x;
248:   PetscInt n_global;
249:   PetscInt xxt_zero_nnz=0;
250:   PetscInt xxt_zero_nnz_0=0;
251:   PetscBLASInt i1 = 1,dlen;
252:   PetscScalar dm1 = -1.0;

255:   n=xxt_handle->mvi->n;
256:   nsep=xxt_handle->info->nsep;
257:   lnsep=xxt_handle->info->lnsep;
258:   fo=xxt_handle->info->fo;
259:   end=lnsep[0];
260:   level=xxt_handle->level;
261:   gs_handle=xxt_handle->mvi->gs_handle;

263:   /* is there a null space? */
264:   /* LATER add in ability to detect null space by checking alpha */
265:   for (i=0, j=0; i<=level; i++)
266:     {j+=nsep[i];}

268:   m = j-xxt_handle->ns;
269:   if (m!=j)
270:     {PetscPrintf(PETSC_COMM_WORLD,"xxt_generate() :: null space exists %D %D %D\n",m,j,xxt_handle->ns);}

272:   /* get and initialize storage for x local         */
273:   /* note that x local is nxm and stored by columns */
274:   col_sz = (PetscInt*) malloc(m*sizeof(PetscInt));
275:   col_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt));
276:   col_vals = (PetscScalar **) malloc(m*sizeof(PetscScalar *));
277:   for (i=j=0; i<m; i++, j+=2)
278:     {
279:       col_indices[j]=col_indices[j+1]=col_sz[i]=-1;
280:       col_vals[i] = NULL;
281:     }
282:   col_indices[j]=-1;

284:   /* size of separators for each sub-hc working from bottom of tree to top */
285:   /* this looks like nsep[]=segments */
286:   stages = (PetscInt*) malloc((level+1)*sizeof(PetscInt));
287:   segs   = (PetscInt*) malloc((level+1)*sizeof(PetscInt));
288:   ivec_zero(stages,level+1);
289:   ivec_copy(segs,nsep,level+1);
290:   for (i=0; i<level; i++)
291:     {segs[i+1] += segs[i];}
292:   stages[0] = segs[0];

294:   /* temporary vectors  */
295:   u  = (PetscScalar *) malloc(n*sizeof(PetscScalar));
296:   z  = (PetscScalar *) malloc(n*sizeof(PetscScalar));
297:   v  = (PetscScalar *) malloc(a_m*sizeof(PetscScalar));
298:   uu = (PetscScalar *) malloc(m*sizeof(PetscScalar));
299:   w  = (PetscScalar *) malloc(m*sizeof(PetscScalar));

301:   /* extra nnz due to replication of vertices across separators */
302:   for (i=1, j=0; i<=level; i++)
303:     {j+=nsep[i];}

305:   /* storage for sparse x values */
306:   n_global = xxt_handle->info->n_global;
307:   xxt_max_nnz = (PetscInt)(2.5*pow(1.0*n_global,1.6667) + j*n/2)/num_nodes;
308:   x = (PetscScalar *) malloc(xxt_max_nnz*sizeof(PetscScalar));
309:   xxt_nnz = 0;

311:   /* LATER - can embed next sep to fire in gs */
312:   /* time to make the donuts - generate X factor */
313:   for (dim=i=j=0;i<m;i++)
314:     {
315:       /* time to move to the next level? */
316:       while (i==segs[dim]) {
317:         if (dim==level) SETERRQ(PETSC_ERR_PLIB,"dim about to exceed level\n");
318:         stages[dim++]=i;
319:         end+=lnsep[dim];
320:       }
321:       stages[dim]=i;

323:       /* which column are we firing? */
324:       /* i.e. set v_l */
325:       /* use new seps and do global min across hc to determine which one to fire */
326:       (start<end) ? (col=fo[start]) : (col=INT_MAX);
327:       giop_hc(&col,&work,1,op2,dim);

329:       /* shouldn't need this */
330:       if (col==INT_MAX)
331:         {
332:           PetscInfo(0,"hey ... col==INT_MAX??\n");
333:           continue;
334:         }

336:       /* do I own it? I should */
337:       rvec_zero(v ,a_m);
338:       if (col==fo[start])
339:         {
340:           start++;
341:           idex=ivec_linear_search(col, a_local2global, a_n);
342:           if (idex!=-1)
343:             {v[idex] = 1.0; j++;}
344:           else
345:             {SETERRQ(PETSC_ERR_PLIB,"NOT FOUND!\n");}
346:         }
347:       else
348:         {
349:           idex=ivec_linear_search(col, a_local2global, a_m);
350:           if (idex!=-1)
351:             {v[idex] = 1.0;}
352:         }

354:       /* perform u = A.v_l */
355:       rvec_zero(u,n);
356:       do_matvec(xxt_handle->mvi,v,u);

358:       /* uu =  X^T.u_l (local portion) */
359:       /* technically only need to zero out first i entries */
360:       /* later turn this into an XXT_solve call ? */
361:       rvec_zero(uu,m);
362:       x_ptr=x;
363:       iptr = col_indices;
364:       for (k=0; k<i; k++)
365:         {
366:           off = *iptr++;
367:           len = *iptr++;
368:           dlen = PetscBLASIntCast(len);
369:           uu[k] = BLASdot_(&dlen,u+off,&i1,x_ptr,&i1);
370:           x_ptr+=len;
371:         }


374:       /* uu = X^T.u_l (comm portion) */
375:       ssgl_radd  (uu, w, dim, stages);

377:       /* z = X.uu */
378:       rvec_zero(z,n);
379:       x_ptr=x;
380:       iptr = col_indices;
381:       for (k=0; k<i; k++)
382:         {
383:           off = *iptr++;
384:           len = *iptr++;
385:           dlen = PetscBLASIntCast(len);
386:           BLASaxpy_(&dlen,&uu[k],x_ptr,&i1,z+off,&i1);
387:           x_ptr+=len;
388:         }

390:       /* compute v_l = v_l - z */
391:       rvec_zero(v+a_n,a_m-a_n);
392:       dlen = PetscBLASIntCast(n);
393:       BLASaxpy_(&dlen,&dm1,z,&i1,v,&i1);

395:       /* compute u_l = A.v_l */
396:       if (a_n!=a_m)
397:         {gs_gop_hc(gs_handle,v,"+\0",dim);}
398:       rvec_zero(u,n);
399:       do_matvec(xxt_handle->mvi,v,u);

401:       /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - local portion */
402:       dlen = PetscBLASIntCast(n);
403:       alpha = BLASdot_(&dlen,u,&i1,v,&i1);
404:       /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - comm portion */
405:       grop_hc(&alpha, &alpha_w, 1, op, dim);

407:       alpha = (PetscScalar) sqrt((double)alpha);

409:       /* check for small alpha                             */
410:       /* LATER use this to detect and determine null space */
411:       if (fabs(alpha)<1.0e-14)
412:         {SETERRQ1(PETSC_ERR_PLIB,"bad alpha! %g\n",alpha);}

414:       /* compute v_l = v_l/sqrt(alpha) */
415:       rvec_scale(v,1.0/alpha,n);

417:       /* add newly generated column, v_l, to X */
418:       flag = 1;
419:       off=len=0;
420:       for (k=0; k<n; k++)
421:         {
422:           if (v[k]!=0.0)
423:             {
424:               len=k;
425:               if (flag)
426:                 {off=k; flag=0;}
427:             }
428:         }

430:       len -= (off-1);

432:       if (len>0)
433:         {
434:           if ((xxt_nnz+len)>xxt_max_nnz)
435:             {
436:               PetscInfo(0,"increasing space for X by 2x!\n");
437:               xxt_max_nnz *= 2;
438:               x_ptr = (PetscScalar *) malloc(xxt_max_nnz*sizeof(PetscScalar));
439:               rvec_copy(x_ptr,x,xxt_nnz);
440:               free(x);
441:               x = x_ptr;
442:               x_ptr+=xxt_nnz;
443:             }
444:           xxt_nnz += len;
445:           rvec_copy(x_ptr,v+off,len);

447:           /* keep track of number of zeros */
448:           if (dim)
449:             {
450:               for (k=0; k<len; k++)
451:                 {
452:                   if (x_ptr[k]==0.0)
453:                     {xxt_zero_nnz++;}
454:                 }
455:             }
456:           else
457:             {
458:               for (k=0; k<len; k++)
459:                 {
460:                   if (x_ptr[k]==0.0)
461:                     {xxt_zero_nnz_0++;}
462:                 }
463:             }
464:           col_indices[2*i] = off;
465:           col_sz[i] = col_indices[2*i+1] = len;
466:           col_vals[i] = x_ptr;
467:         }
468:       else
469:         {
470:           col_indices[2*i] = 0;
471:           col_sz[i] = col_indices[2*i+1] = 0;
472:           col_vals[i] = x_ptr;
473:         }
474:     }

476:   /* close off stages for execution phase */
477:   while (dim!=level)
478:     {
479:       stages[dim++]=i;
480:       PetscInfo2(0,"disconnected!!! dim(%D)!=level(%D)\n",dim,level);
481:     }
482:   stages[dim]=i;

484:   xxt_handle->info->n=xxt_handle->mvi->n;
485:   xxt_handle->info->m=m;
486:   xxt_handle->info->nnz=xxt_nnz;
487:   xxt_handle->info->max_nnz=xxt_max_nnz;
488:   xxt_handle->info->msg_buf_sz=stages[level]-stages[0];
489:   xxt_handle->info->solve_uu = (PetscScalar *) malloc(m*sizeof(PetscScalar));
490:   xxt_handle->info->solve_w  = (PetscScalar *) malloc(m*sizeof(PetscScalar));
491:   xxt_handle->info->x=x;
492:   xxt_handle->info->col_vals=col_vals;
493:   xxt_handle->info->col_sz=col_sz;
494:   xxt_handle->info->col_indices=col_indices;
495:   xxt_handle->info->stages=stages;
496:   xxt_handle->info->nsolves=0;
497:   xxt_handle->info->tot_solve_time=0.0;

499:   free(segs);
500:   free(u);
501:   free(v);
502:   free(uu);
503:   free(z);
504:   free(w);

506:   return(0);
507: }

509: /**************************************xxt.c***********************************/
510: static PetscErrorCode do_xxt_solve(xxt_ADT xxt_handle,  PetscScalar *uc)
511: {
512:    PetscInt off, len, *iptr;
513:   PetscInt level       =xxt_handle->level;
514:   PetscInt n           =xxt_handle->info->n;
515:   PetscInt m           =xxt_handle->info->m;
516:   PetscInt *stages     =xxt_handle->info->stages;
517:   PetscInt *col_indices=xxt_handle->info->col_indices;
518:   PetscScalar *x_ptr, *uu_ptr;
519:   PetscScalar *solve_uu=xxt_handle->info->solve_uu;
520:   PetscScalar *solve_w =xxt_handle->info->solve_w;
521:   PetscScalar *x       =xxt_handle->info->x;
522:   PetscBLASInt i1 = 1,dlen;

525:   uu_ptr=solve_uu;
526:   rvec_zero(uu_ptr,m);

528:   /* x  = X.Y^T.b */
529:   /* uu = Y^T.b */
530:   for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len)
531:     {
532:       off=*iptr++; len=*iptr++;
533:       dlen = PetscBLASIntCast(len);
534:       *uu_ptr++ = BLASdot_(&dlen,uc+off,&i1,x_ptr,&i1);
535:     }

537:   /* comunication of beta */
538:   uu_ptr=solve_uu;
539:   if (level) {ssgl_radd(uu_ptr, solve_w, level, stages);}

541:   rvec_zero(uc,n);

543:   /* x = X.uu */
544:   for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len)
545:     {
546:       off=*iptr++; len=*iptr++;
547:       dlen = PetscBLASIntCast(len);
548:       BLASaxpy_(&dlen,uu_ptr++,x_ptr,&i1,uc+off,&i1);
549:     }
550:   return(0);
551: }

553: /**************************************xxt.c***********************************/
554: static PetscErrorCode check_handle(xxt_ADT xxt_handle)
555: {
556:   PetscInt vals[2], work[2], op[] = {NON_UNIFORM,GL_MIN,GL_MAX};

559:   if (xxt_handle==NULL)
560:     {SETERRQ1(PETSC_ERR_PLIB,"check_handle() :: bad handle :: NULL %D\n",xxt_handle);}

562:   vals[0]=vals[1]=xxt_handle->id;
563:   giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
564:   if ((vals[0]!=vals[1])||(xxt_handle->id<=0))
565:     {SETERRQ3(PETSC_ERR_PLIB,"check_handle() :: bad handle :: id mismatch min/max %D/%D %D\n",vals[0],vals[1], xxt_handle->id);}
566:   return(0);
567: }

569: /**************************************xxt.c***********************************/
570: static  PetscErrorCode det_separators(xxt_ADT xxt_handle)
571: {
572:   PetscInt i, ct, id;
573:   PetscInt mask, edge, *iptr;
574:   PetscInt *dir, *used;
575:   PetscInt sum[4], w[4];
576:   PetscScalar rsum[4], rw[4];
577:   PetscInt op[] = {GL_ADD,0};
578:   PetscScalar *lhs, *rhs;
579:   PetscInt *nsep, *lnsep, *fo, nfo=0;
580:   gs_ADT gs_handle=xxt_handle->mvi->gs_handle;
581:   PetscInt *local2global=xxt_handle->mvi->local2global;
582:   PetscInt  n=xxt_handle->mvi->n;
583:   PetscInt  m=xxt_handle->mvi->m;
584:   PetscInt level=xxt_handle->level;
585:   PetscInt shared=FALSE;

588:   dir  = (PetscInt*)malloc(sizeof(PetscInt)*(level+1));
589:   nsep = (PetscInt*)malloc(sizeof(PetscInt)*(level+1));
590:   lnsep= (PetscInt*)malloc(sizeof(PetscInt)*(level+1));
591:   fo   = (PetscInt*)malloc(sizeof(PetscInt)*(n+1));
592:   used = (PetscInt*)malloc(sizeof(PetscInt)*n);

594:   ivec_zero(dir  ,level+1);
595:   ivec_zero(nsep ,level+1);
596:   ivec_zero(lnsep,level+1);
597:   ivec_set (fo   ,-1,n+1);
598:   ivec_zero(used,n);

600:   lhs  = (PetscScalar*)malloc(sizeof(PetscScalar)*m);
601:   rhs  = (PetscScalar*)malloc(sizeof(PetscScalar)*m);

603:   /* determine the # of unique dof */
604:   rvec_zero(lhs,m);
605:   rvec_set(lhs,1.0,n);
606:   gs_gop_hc(gs_handle,lhs,"+\0",level);
607:   rvec_zero(rsum,2);
608:   for (ct=i=0;i<n;i++)
609:     {
610:       if (lhs[i]!=0.0)
611:         {rsum[0]+=1.0/lhs[i]; rsum[1]+=lhs[i];}
612:     }
613:   grop_hc(rsum,rw,2,op,level);
614:   rsum[0]+=0.1;
615:   rsum[1]+=0.1;

617:   if (fabs(rsum[0]-rsum[1])>EPS)
618:     {shared=TRUE;}

620:   xxt_handle->info->n_global=xxt_handle->info->m_global=(PetscInt) rsum[0];
621:   xxt_handle->mvi->n_global =xxt_handle->mvi->m_global =(PetscInt) rsum[0];

623:   /* determine separator sets top down */
624:   if (shared)
625:     {
626:       for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1)
627:         {
628:           /* set rsh of hc, fire, and collect lhs responses */
629:           (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m);
630:           gs_gop_hc(gs_handle,lhs,"+\0",edge);
631: 
632:           /* set lsh of hc, fire, and collect rhs responses */
633:           (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m);
634:           gs_gop_hc(gs_handle,rhs,"+\0",edge);
635: 
636:           for (i=0;i<n;i++)
637:             {
638:               if (id< mask)
639:                 {
640:                   if (lhs[i]!=0.0)
641:                     {lhs[i]=1.0;}
642:                 }
643:               if (id>=mask)
644:                 {
645:                   if (rhs[i]!=0.0)
646:                     {rhs[i]=1.0;}
647:                 }
648:             }

650:           if (id< mask)
651:             {gs_gop_hc(gs_handle,lhs,"+\0",edge-1);}
652:           else
653:             {gs_gop_hc(gs_handle,rhs,"+\0",edge-1);}

655:           /* count number of dofs I own that have signal and not in sep set */
656:           rvec_zero(rsum,4);
657:           for (ivec_zero(sum,4),ct=i=0;i<n;i++)
658:             {
659:               if (!used[i])
660:                 {
661:                   /* number of unmarked dofs on node */
662:                   ct++;
663:                   /* number of dofs to be marked on lhs hc */
664:                   if (id< mask)
665:                     {
666:                       if (lhs[i]!=0.0)
667:                         {sum[0]++; rsum[0]+=1.0/lhs[i];}
668:                     }
669:                   /* number of dofs to be marked on rhs hc */
670:                   if (id>=mask)
671:                     {
672:                       if (rhs[i]!=0.0)
673:                         {sum[1]++; rsum[1]+=1.0/rhs[i];}
674:                     }
675:                 }
676:             }

678:           /* go for load balance - choose half with most unmarked dofs, bias LHS */
679:           (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
680:           (id<mask) ? (rsum[2]=ct) : (rsum[3]=ct);
681:           giop_hc(sum,w,4,op,edge);
682:           grop_hc(rsum,rw,4,op,edge);
683:           rsum[0]+=0.1; rsum[1]+=0.1; rsum[2]+=0.1; rsum[3]+=0.1;

685:           if (id<mask)
686:             {
687:               /* mark dofs I own that have signal and not in sep set */
688:               for (ct=i=0;i<n;i++)
689:                 {
690:                   if ((!used[i])&&(lhs[i]!=0.0))
691:                     {
692:                       ct++; nfo++;

694:                       if (nfo>n)
695:                         {SETERRQ(PETSC_ERR_PLIB,"nfo about to exceed n\n");}

697:                       *--iptr = local2global[i];
698:                       used[i]=edge;
699:                     }
700:                 }
701:               if (ct>1) {ivec_sort(iptr,ct);}

703:               lnsep[edge]=ct;
704:               nsep[edge]=(PetscInt) rsum[0];
705:               dir [edge]=LEFT;
706:             }

708:           if (id>=mask)
709:             {
710:               /* mark dofs I own that have signal and not in sep set */
711:               for (ct=i=0;i<n;i++)
712:                 {
713:                   if ((!used[i])&&(rhs[i]!=0.0))
714:                     {
715:                       ct++; nfo++;

717:                       if (nfo>n)
718:                         {SETERRQ(PETSC_ERR_PLIB,"nfo about to exceed n\n");}

720:                       *--iptr = local2global[i];
721:                       used[i]=edge;
722:                     }
723:                 }
724:               if (ct>1) {ivec_sort(iptr,ct);}

726:               lnsep[edge]=ct;
727:               nsep[edge]= (PetscInt) rsum[1];
728:               dir [edge]=RIGHT;
729:             }

731:           /* LATER or we can recur on these to order seps at this level */
732:           /* do we need full set of separators for this?                */

734:           /* fold rhs hc into lower */
735:           if (id>=mask)
736:             {id-=mask;}
737:         }
738:     }
739:   else
740:     {
741:       for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1)
742:         {
743:           /* set rsh of hc, fire, and collect lhs responses */
744:           (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m);
745:           gs_gop_hc(gs_handle,lhs,"+\0",edge);

747:           /* set lsh of hc, fire, and collect rhs responses */
748:           (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m);
749:           gs_gop_hc(gs_handle,rhs,"+\0",edge);

751:           /* count number of dofs I own that have signal and not in sep set */
752:           for (ivec_zero(sum,4),ct=i=0;i<n;i++)
753:             {
754:               if (!used[i])
755:                 {
756:                   /* number of unmarked dofs on node */
757:                   ct++;
758:                   /* number of dofs to be marked on lhs hc */
759:                   if ((id< mask)&&(lhs[i]!=0.0)) {sum[0]++;}
760:                   /* number of dofs to be marked on rhs hc */
761:                   if ((id>=mask)&&(rhs[i]!=0.0)) {sum[1]++;}
762:                 }
763:             }

765:           /* go for load balance - choose half with most unmarked dofs, bias LHS */
766:           (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
767:           giop_hc(sum,w,4,op,edge);

769:           /* lhs hc wins */
770:           if (sum[2]>=sum[3])
771:             {
772:               if (id<mask)
773:                 {
774:                   /* mark dofs I own that have signal and not in sep set */
775:                   for (ct=i=0;i<n;i++)
776:                     {
777:                       if ((!used[i])&&(lhs[i]!=0.0))
778:                         {
779:                           ct++; nfo++;
780:                           *--iptr = local2global[i];
781:                           used[i]=edge;
782:                         }
783:                     }
784:                   if (ct>1) {ivec_sort(iptr,ct);}
785:                   lnsep[edge]=ct;
786:                 }
787:               nsep[edge]=sum[0];
788:               dir [edge]=LEFT;
789:             }
790:           /* rhs hc wins */
791:           else
792:             {
793:               if (id>=mask)
794:                 {
795:                   /* mark dofs I own that have signal and not in sep set */
796:                   for (ct=i=0;i<n;i++)
797:                     {
798:                       if ((!used[i])&&(rhs[i]!=0.0))
799:                         {
800:                           ct++; nfo++;
801:                           *--iptr = local2global[i];
802:                           used[i]=edge;
803:                         }
804:                     }
805:                   if (ct>1) {ivec_sort(iptr,ct);}
806:                   lnsep[edge]=ct;
807:                 }
808:               nsep[edge]=sum[1];
809:               dir [edge]=RIGHT;
810:             }
811:           /* LATER or we can recur on these to order seps at this level */
812:           /* do we need full set of separators for this?                */

814:           /* fold rhs hc into lower */
815:           if (id>=mask)
816:             {id-=mask;}
817:         }
818:     }


821:   /* level 0 is on processor case - so mark the remainder */
822:   for (ct=i=0;i<n;i++)
823:     {
824:       if (!used[i])
825:         {
826:           ct++; nfo++;
827:           *--iptr = local2global[i];
828:           used[i]=edge;
829:         }
830:     }
831:   if (ct>1) {ivec_sort(iptr,ct);}
832:   lnsep[edge]=ct;
833:   nsep [edge]=ct;
834:   dir  [edge]=LEFT;

836:   xxt_handle->info->nsep=nsep;
837:   xxt_handle->info->lnsep=lnsep;
838:   xxt_handle->info->fo=fo;
839:   xxt_handle->info->nfo=nfo;

841:   free(dir);
842:   free(lhs);
843:   free(rhs);
844:   free(used);
845:   return(0);
846: }

848: /**************************************xxt.c***********************************/
849: static mv_info *set_mvi(PetscInt *local2global, PetscInt n, PetscInt m, void *matvec, void *grid_data)
850: {
851:   mv_info *mvi;


854:   mvi = (mv_info*)malloc(sizeof(mv_info));
855:   mvi->n=n;
856:   mvi->m=m;
857:   mvi->n_global=-1;
858:   mvi->m_global=-1;
859:   mvi->local2global=(PetscInt*)malloc((m+1)*sizeof(PetscInt));
860:   ivec_copy(mvi->local2global,local2global,m);
861:   mvi->local2global[m] = INT_MAX;
862:   mvi->matvec=(PetscErrorCode (*)(mv_info*,PetscScalar*,PetscScalar*))matvec;
863:   mvi->grid_data=grid_data;

865:   /* set xxt communication handle to perform restricted matvec */
866:   mvi->gs_handle = gs_init(local2global, m, num_nodes);

868:   return(mvi);
869: }

871: /**************************************xxt.c***********************************/
872: static PetscErrorCode do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u)
873: {
875:   A->matvec((mv_info*)A->grid_data,v,u);
876:   return(0);
877: }