/* * @HEADER * * *********************************************************************** * * Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring * Copyright 2012 Sandia Corporation * * Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, * the U.S. Government retains certain rights in this software. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Corporation nor the names of the * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Questions? Contact Karen Devine kddevin@sandia.gov * Erik Boman egboman@sandia.gov * * *********************************************************************** * * @HEADER */ #ifdef __cplusplus /* if C++, define the rest of this header file as extern C */ extern "C" { #endif #include "zz_const.h" #include "phg_hypergraph.h" #include /* * Values indicating how part remapping should be done. */ #define ZOLTAN_LB_REMAP_NONE 0 #define ZOLTAN_LB_REMAP_PROCESSORS 1 #define ZOLTAN_LB_REMAP_PARTS 2 #define HEINFO_ENTRIES 3 static int gather_and_build_remap(ZZ *, int *, int, int *); static int set_remap_type(ZZ *, int *); static int malloc_HEinfo(ZZ *, int, int **); static int do_match(ZZ*, HGraph *, int *, int); static int matching_pgm(ZZ *, HGraph *, int *, int *); static int local_HEs_from_import_lists(ZZ *, int, int, int *, int *, int *, int *, int **); static int local_HEs_from_export_lists(ZZ *, int, int, int *, int *, int *, int *, int **); static float measure_stays(ZZ *, HGraph *, int, int *, char *); /******************************************************************************/ int Zoltan_LB_Remap( ZZ *zz, int *new_map, /* Upon return, flag indicating whether part or proc assignments actually changed due to remapping. */ int nobj, /* # objs the processor knows about after partitioning */ int *proc, /* processors for the objs; if export_list_flag == 1, proc contains new proc assignment else proc contains old proc assignment Upon return, proc contains remapped new proc assignment regardless of export_list_flag's value. */ int *old_part, /* old part assignments for the objs */ int *new_part, /* new part assignments for the objs. Upon return, new_part contains remapped new part assignments */ int export_list_flag /* Flag indicating whether the algorithm computes export lists or import lists. The HG for matching is built differently depending on whether the algorithm knows export or import info. */ ) { char *yo = "Zoltan_LB_Remap"; int ierr = ZOLTAN_OK; int i; int remap_type; /* Type of remapping to be done: Procs, Parts, or None */ int HEcnt = 0; /* Number of local hyperedges */ int *HEinfo = NULL; /* Array of HE info; for each HE, two pins and one edge weight. Stored as a single vector to minimize communication calls. */ *new_map = 0; /* Determine type of remapping that is appropriate */ ierr = set_remap_type(zz, &remap_type); if (remap_type != ZOLTAN_LB_REMAP_NONE) { /* Build local hyperedges */ if (export_list_flag) ierr = local_HEs_from_export_lists(zz, remap_type, nobj, proc, old_part, new_part, &HEcnt, &HEinfo); else ierr = local_HEs_from_import_lists(zz, remap_type, nobj, proc, old_part, new_part, &HEcnt, &HEinfo); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error building local HEs"); goto End; } /* Gather local hyperedges to each processor; build remap vector */ ierr = gather_and_build_remap(zz, new_map, HEcnt, HEinfo); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from gather_and_build_remap."); goto End; } if (*new_map) { /* Update part and processor information for algorithms */ for (i = 0; i < nobj; i++) { new_part[i] = zz->LB.Remap[new_part[i]]; proc[i] = Zoltan_LB_Part_To_Proc(zz, new_part[i], NULL); } } } End: ZOLTAN_FREE(&HEinfo); return(ierr); } /******************************************************************************/ static int local_HEs_from_import_lists( ZZ *zz, int remap_type, /* type of remapping to do: parts, procs, or none. */ int nobj, /* # objs the processor knows about (keep + imports) */ int *proc, /* On input, old processor assignment for each obj; Upon return, remapped new proc assignment for each obj. */ int *old_part, /* old part assignments for each objs */ int *new_part, /* On input, new part assignments for each objs. Upon return, remapped new part assignments */ int *HEcnt, /* # of HEs allocated. */ int **HEinfo /* Array of HE info; for each HE, two pins and one edge weight. Stored as a single vector to minimize communication calls. */ ) { /* Routine to remap parts (to new processors or new part numbers) * to reduce data movement. * This routine assumes the load-balancing algorithm built import lists. * Objects described are those that ENDED UP on my_proc due to load balancing. * For all these objects, new_proc == my_proc. */ char *yo = "local_HEs_from_import_lists"; int ierr = ZOLTAN_OK; int i, cnt, tmp; int *tmp_HEinfo; int old_size; /* # of old entries to remap to. If remapping parts to processors, old_size = Num_Procs; if renumbering parts, old_size = old num parts. */ int fp; /* First part on this processor in new decomposition. */ int np; /* # of parts on this processor in new decomposition. */ int my_proc = zz->Proc; /* This processor's rank. */ int minp, maxp; /* Lowest and highest part numbers on this processor in old decomposition; part numbers are assumed to be dense, but no particular distribution is assumed. */ int HEwgt_size; /* # of HE weights allocated. */ int *HEwgt = NULL; /* Array of HE weights. Initially includes zero weights; later zero-weights are removed.*/ if (remap_type == ZOLTAN_LB_REMAP_PROCESSORS) { /* Renumber new processors to minimize changes in proc assignment. */ HEwgt_size = zz->Num_Proc; HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int)); if (!HEwgt) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } for (i = 0; i < nobj; i++) HEwgt[proc[i]]++; /* At this point, proc has old proc assignments */ *HEcnt = 0; for (i = 0; i < HEwgt_size; i++) if (HEwgt[i] != 0) (*HEcnt)++; ierr = malloc_HEinfo(zz, *HEcnt, HEinfo); if (ierr < 0) goto End; tmp_HEinfo = *HEinfo; cnt = 0; for (i = 0; i < HEwgt_size; i++) { if (HEwgt[i] != 0) { tmp = cnt * HEINFO_ENTRIES; tmp_HEinfo[tmp] = i; /* Old processor number */ tmp_HEinfo[tmp+1] = my_proc; /* New processor number */ tmp_HEinfo[tmp+2] = HEwgt[i]; /* shift non-zero weights down. */ cnt++; } } } else { /* ZOLTAN_LB_REMAP_PARTS */ /* Renumber new parts to minimize changes in part assignment */ for (minp = INT_MAX, maxp = 0, i = 0; i < nobj; i++) { if (old_part[i] < minp) minp = old_part[i]; if (old_part[i] > maxp) maxp = old_part[i]; } /* Don't include old part numbers that are greater than * zz->LB.Num_Global_Parts - 1; they are not valid values for * remapping of new part numbers. */ if (minp >= zz->LB.Num_Global_Parts) minp = zz->LB.Num_Global_Parts-1; if (maxp >= zz->LB.Num_Global_Parts) maxp = zz->LB.Num_Global_Parts-1; old_size = maxp - minp + 1; Zoltan_LB_Proc_To_Part(zz, my_proc, &np, &fp); HEwgt_size = np * old_size; if (HEwgt_size > 0) { HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int)); if (!HEwgt) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } } for (i = 0; i < nobj; i++) { if (old_part[i] < zz->LB.Num_Global_Parts) { /* Include only HEs to old parts numbered * 0 to zz->LB.Num_Global_Parts-1; these are the only valid * remapping values for the new part numbers. */ tmp = (new_part[i]-fp) * old_size; HEwgt[tmp + (old_part[i]-minp)]++; } } *HEcnt = 0; for (i = 0; i < HEwgt_size; i++) if (HEwgt[i] != 0) (*HEcnt)++; ierr = malloc_HEinfo(zz, *HEcnt, HEinfo); if (ierr < 0) goto End; tmp_HEinfo = *HEinfo; cnt = 0; for (i = 0; i < HEwgt_size; i++) { if (HEwgt[i] != 0) { tmp = cnt * HEINFO_ENTRIES; tmp_HEinfo[tmp] = i%old_size + minp; /* Old part number */ tmp_HEinfo[tmp+1] = i/old_size + fp; /* New part number */ tmp_HEinfo[tmp+2] = HEwgt[i]; /* shift non-zero weights down. */ cnt++; } } } End: if (HEwgt) ZOLTAN_FREE(&HEwgt); return ierr; } /******************************************************************************/ static int local_HEs_from_export_lists( ZZ *zz, int remap_type, /* type of remapping to do: parts, procs, or none. */ int nobj, /* # objs the processor knows about (keep + exports) */ int *new_proc, /* On input, new processor assignment for each obj; Upon return, remapped new proc assignment for each obj. */ int *old_part, /* old part assignments for each objs */ int *new_part, /* On input, new part assignments for each objs. Upon return, remapped new part assignments */ int *HEcnt, /* # of HEs allocated. */ int **HEinfo /* Array of HE info; for each HE, two pins and one edge weight. Stored as a single vector to minimize communication calls. */ ) { /* Routine to remap parts (to new processors or new part numbers) * to reduce data movement. * This routine assumes the load-balancing algorithm built export lists. * Objects described are those that STARTED on zz->Proc due to load balancing. * For all these objects, old_proc == zz->Proc. */ char *yo = "local_HEs_from_export_lists"; int ierr = ZOLTAN_OK; int i, cnt, tmp; int *tmp_HEinfo; int my_proc = zz->Proc; /* This processor's rank. */ int nimp = 0; int *imp_proc = NULL, /* Temporary arrays if inversion of export to */ *imp_old_part = NULL, /* import lists is needed. */ *imp_new_part = NULL; int HEwgt_size; /* # of HE weights allocated. */ int *HEwgt = NULL; /* Array of HE weights. Initially includes zero weights; later zero-weights are removed.*/ if (remap_type == ZOLTAN_LB_REMAP_PROCESSORS) { /* Build HEs based on processor assignment. * We know the old processor for all objects we are keeping and all * export objects -- it is my_proc! * We also know the new processor number for all objects initially on * my_proc (since we built export lists.) * This case is a special case of part remapping; it is easy to * build the hyperedges in this special case. */ HEwgt_size = zz->Num_Proc; HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int)); if (!HEwgt) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } for (i = 0; i < nobj; i++) HEwgt[new_proc[i]]++; *HEcnt = 0; for (i = 0; i < HEwgt_size; i++) if (HEwgt[i] != 0) (*HEcnt)++; ierr = malloc_HEinfo(zz, *HEcnt, HEinfo); if (ierr < 0) goto End; tmp_HEinfo = *HEinfo; cnt = 0; for (i = 0; i < HEwgt_size; i++) { if (HEwgt[i] != 0) { tmp = cnt * HEINFO_ENTRIES; tmp_HEinfo[tmp] = my_proc; /* Old processor number */ tmp_HEinfo[tmp+1] = i; /* New processor number */ tmp_HEinfo[tmp+2] = HEwgt[i]; /* shift non-zero weights down. */ cnt++; } } } else { /* ZOLTAN_LB_REMAP_PARTS */ /* Cannot renumber parts given export lists without summing HE weights * across processors. This summation is not straightforward. Also, a * potentially large number of HEs may exist * (max_old_part_number * zz->Num_Global_Parts). Rather than build * this large matrix, just compute import lists from the export lists * and run the import-list algorithm. */ ZOLTAN_COMM_OBJ *plan; int msg_tag = 22345; ierr = Zoltan_Comm_Create(&plan, nobj, new_proc, zz->Communicator, msg_tag, &nimp); if (nimp > 0) { imp_proc = (int *) ZOLTAN_MALLOC(3 * nimp * sizeof(int)); imp_old_part = imp_proc + nimp; imp_new_part = imp_old_part + nimp; if (!imp_proc) { ierr = ZOLTAN_MEMERR; ZOLTAN_PRINT_ERROR(my_proc, yo, "Memory error."); goto End; } } ierr = Zoltan_Comm_Info(plan, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, imp_proc, NULL); msg_tag++; ierr = Zoltan_Comm_Do(plan, msg_tag, (char *) old_part, sizeof(int), (char *) imp_old_part); msg_tag++; ierr = Zoltan_Comm_Do(plan, msg_tag, (char *) new_part, sizeof(int), (char *) imp_new_part); Zoltan_Comm_Destroy(&plan); ierr = local_HEs_from_import_lists(zz, remap_type, nimp, imp_proc, imp_old_part, imp_new_part, HEcnt, HEinfo); } End: if (HEwgt) ZOLTAN_FREE(&HEwgt); if (imp_proc) ZOLTAN_FREE(&imp_proc); return ierr; } /******************************************************************************/ static int set_remap_type( ZZ *zz, int *remap_type ) { int ierr = ZOLTAN_OK; /* Set remap type based on distribution of parts to processors. */ if (zz->LB.Remap_Flag == 0) { /* No remapping requested */ *remap_type = ZOLTAN_LB_REMAP_NONE; } else if (!(zz->LB.Uniform_Parts)) { /* Remapping does not respect requested non-uniform part sizes; no remapping done. */ *remap_type = ZOLTAN_LB_REMAP_NONE; ierr = ZOLTAN_WARN; } else if (!(zz->LB.Single_Proc_Per_Part)) { /* Some parts spread across >1 processor; remapping not supported. */ *remap_type = ZOLTAN_LB_REMAP_NONE; ierr = ZOLTAN_WARN; } else if (zz->LB.PartDist == NULL) { /* # Parts == # Processors, uniformly distributed; remap processors */ *remap_type = ZOLTAN_LB_REMAP_PROCESSORS; } else { /* # Parts != # processors, or parts not uniformly distributed */ *remap_type = ZOLTAN_LB_REMAP_PARTS; } return ierr; } /******************************************************************************/ static int do_match( ZZ *zz, HGraph *hg, /* Hypergraph data structure on which to do the matching. */ int *match, /* Matching array -- output */ int limit /* max number of matches that are allowed */ ) { /* Temporary function; will be replace by a real matching function later. */ int ierr = ZOLTAN_OK; int i; /* Default initialization -- no change in mapping */ for (i = 0; i < hg->nVtx; i++) match[i] = i; ierr = matching_pgm(zz, hg, match, &limit); return ierr; } /******************************************************************************/ static int malloc_HEinfo( ZZ *zz, int HEcnt, /* Number of HEs to allocate */ int **HEinfo /* Array of HE info; for each HE, two pins and one edge weight. Stored as a single vector to minimize communication calls. */ ) { /* Routine for allocating HEs to use in remap's matching routine. */ char *yo = "malloc_HEinfo"; int ierr = ZOLTAN_OK; if (HEcnt) { *HEinfo = (int *) ZOLTAN_MALLOC(HEINFO_ENTRIES * HEcnt * sizeof(int)); if (*HEinfo == NULL) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; } } else *HEinfo = NULL; return ierr; } /******************************************************************************/ static int gather_and_build_remap( ZZ *zz, int *new_map, /* Upon return, flag indicating whether parts assignments were changed due to remap. */ int HEcnt, /* # of HEs allocated. */ int *HEinfo /* Array of HE info; for each HE, two pins and one edge weight. Stored as a single vector to minimize communication calls. */ ) { char *yo = "gather_and_remap"; int ierr = ZOLTAN_OK; int i, uidx, tmp; int *each_size = NULL; /* sizes (# HEs * HEINFO_ENTRIES) for each proc */ int *recvbuf = NULL; /* Receive buffer for gatherv */ int *displs = NULL; /* Displacement buffer for gatherv */ int send_size; /* Local # HEs * HEINFO_ENTRIES */ int total_size; /* Total # ints in gatherv */ int total_HEcnt; /* Total (across all procs) number of HEs. */ int max0, max1; /* Max values of pin 0 and pin 1 for each HE. */ int *match = NULL; /* Vector describing the matching. match[i] = j ==> match[j] = i ==> vertices i and j are matched. */ int *used = NULL; /* Vector indicating which parts are used in the matching. */ int limit; /* Maximum number of matches that are allowed */ HGraph hg; /* Hypergraph for matching */ float before_remap = 0, /* Amount of data that overlaps between old and */ after_remap = 0; /* new decomposition before and after remapping, respectively. */ float with_oldremap = 0; /* Amount of data that overlaps between old and new decomposition using the OldRemap vector (remapping from the previous decomposition). */ /* Gather HEs from each processor into a local complete HG. */ each_size = (int *) ZOLTAN_MALLOC(zz->Num_Proc * sizeof(int)); if (!each_size) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } send_size = HEcnt * HEINFO_ENTRIES; MPI_Allgather(&send_size, 1, MPI_INT, each_size, 1, MPI_INT, zz->Communicator); for (total_size = 0, i = 0; i < zz->Num_Proc; i++) { total_size += each_size[i]; } recvbuf = (int *) ZOLTAN_MALLOC((zz->Num_Proc + total_size) * sizeof(int)); displs = recvbuf + total_size; if (!recvbuf) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } displs[0] = 0; for (i = 1; i < zz->Num_Proc; i++) displs[i] = displs[i-1] + each_size[i-1]; MPI_Allgatherv(HEinfo, send_size, MPI_INT, recvbuf, each_size, displs, MPI_INT, zz->Communicator); total_HEcnt = total_size / HEINFO_ENTRIES; for (max0 = -1, max1 = -1, i = 0; i < total_HEcnt; i++) { tmp = i * HEINFO_ENTRIES; if (recvbuf[tmp] > max0) max0 = recvbuf[tmp]; if (recvbuf[tmp+1] > max1) max1 = recvbuf[tmp+1]; } /* Increment max0 and max1 so that they are the maximum number of unique pin values for pin0 and pin1 respectively; i.e., allow pin value == 0. */ max0++; max1++; /* Sanity check */ /* Ideally, max1 should equal LB.Num_Global_Parts, but ParMETIS3 sometimes * does not return the correct number of non-empty parts, allowing * max1 to be less than LB.Num_Global_Parts. * (e.g., ewgt.adaptive-partlocal1-v3.4.?). */ if (max1 > zz->LB.Num_Global_Parts) ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Unexpected value for max1."); /* Set up global HG */ Zoltan_HG_HGraph_Init(&hg); if (total_HEcnt) { hg.nVtx = max0 + zz->LB.Num_Global_Parts; hg.nEdge = total_HEcnt; hg.nPins = total_HEcnt * 2; /* two pins per HE */ hg.EdgeWeightDim = 1; hg.ewgt = (float *) ZOLTAN_MALLOC(total_HEcnt * sizeof(float)); hg.hindex = (int *) ZOLTAN_MALLOC((total_HEcnt + 1) * sizeof(int)); hg.hvertex = (int *) ZOLTAN_MALLOC((hg.nPins) * sizeof(int)); if (!hg.ewgt || !hg.hindex || !hg.hvertex) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } for (i = 0; i < total_HEcnt; i++) { tmp = i * HEINFO_ENTRIES; hg.hindex[i] = i+i; hg.hvertex[i+i] = recvbuf[tmp]; hg.hvertex[i+i+1] = (int)recvbuf[tmp+1]+max0; hg.ewgt[i] = recvbuf[tmp+2]; } hg.hindex[total_HEcnt] = total_HEcnt + total_HEcnt; ierr = Zoltan_HG_Create_Mirror(zz, &hg); if (ierr < 0) goto End; } before_remap = measure_stays(zz, &hg, max0, NULL, "BEFORE"); /* Compute the amount of overlap when using the old remap vector. */ with_oldremap = measure_stays(zz, &hg, max0, zz->LB.OldRemap, "WITHOLD"); /* Do matching */ match = (int *) ZOLTAN_CALLOC(hg.nVtx + zz->LB.Num_Global_Parts, sizeof(int)); used = match + hg.nVtx; if (hg.nVtx && !match) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } /* Max # matches allowed */ limit = (max0 < zz->LB.Num_Global_Parts ? max0 : zz->LB.Num_Global_Parts); do_match(zz, &hg, match, limit); /* Build remapping vector, if non-trivial matching was returned. */ *new_map = 0; for (i = 0; i < zz->LB.Num_Global_Parts; i++) if (match[i+max0] != i+max0) { *new_map = 1; break; } if (*new_map) { zz->LB.Remap = (int *) ZOLTAN_MALLOC(zz->LB.Num_Global_Parts * sizeof(int)); if (!(zz->LB.Remap)) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } /* First, process all parts that were matched. Mark matched parts as used.*/ for (i = 0; i < zz->LB.Num_Global_Parts; i++) { zz->LB.Remap[i] = -1; tmp = match[i+max0]; if (tmp != i+max0) { zz->LB.Remap[i] = tmp; used[tmp] = 1; } } /* Second, process unmatched parts; if possible, keep same part number. */ for (i = 0; i < zz->LB.Num_Global_Parts; i++) { if (zz->LB.Remap[i] > -1) continue; /* Already processed part i */ /* match[i+max0] == i+max0 */ if (!used[i]) { /* Keep the same part number if it is not used */ zz->LB.Remap[i] = i; used[i] = 1; } } /* Third, process remaining unmatched parts; assign them to unused parts.*/ for (uidx = 0, i = 0; i < zz->LB.Num_Global_Parts; i++) { if (zz->LB.Remap[i] > -1) continue; /* Already processed part i */ /* match[i+max0] == i+max0 */ while (used[uidx]) uidx++; /* Find next unused part */ zz->LB.Remap[i] = uidx; used[uidx] = 1; } } if (*new_map) after_remap = measure_stays(zz, &hg, max0, zz->LB.Remap, "AFTER "); if ((before_remap >= after_remap) && (before_remap >= with_oldremap)) { /* No benefit from remapping; don't keep it! */ ZOLTAN_FREE(&zz->LB.Remap); ZOLTAN_FREE(&zz->LB.OldRemap); *new_map = 0; } else if (with_oldremap >= after_remap) { /* The old remap vector is better than the new one; keep the old one. */ ZOLTAN_FREE(&zz->LB.Remap); zz->LB.Remap = zz->LB.OldRemap; zz->LB.OldRemap = NULL; *new_map = 1; } else { /* Going to use the new remap vector; free the old one. */ ZOLTAN_FREE(&zz->LB.OldRemap); } if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->Proc == zz->Debug_Proc && zz->LB.Remap) for (i = 0; i < zz->LB.Num_Global_Parts; i++) printf("%d REMAP Part %d to Part %d\n", zz->Proc, i, zz->LB.Remap[i]); End: ZOLTAN_FREE(&match); ZOLTAN_FREE(&each_size); ZOLTAN_FREE(&recvbuf); Zoltan_HG_HGraph_Free(&hg); return ierr; } /******************************************************************************/ static float measure_stays( ZZ *zz, HGraph *hg, int max0, int *remapvec, char *when ) { /* Routine that measures and prints the amount of data that doesn't move * as described by the hypergraph. */ float stay = 0.; int tmp, i; for (i = 0; i < hg->nEdge; i++) { tmp = i + i; if (remapvec) { if (hg->hvertex[tmp] == (int)remapvec[hg->hvertex[tmp+1]-max0]) stay += hg->ewgt[i]; } else { if (hg->hvertex[tmp] == (hg->hvertex[tmp+1]-max0)) stay += hg->ewgt[i]; } } if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->Proc == zz->Debug_Proc) printf("%d REMAP--%s: TOTAL AMT STAY = %g\n\n", zz->Proc, when, stay); return(stay); } /******************************************************************************/ /* path growing matching, hypergraph version */ static int matching_pgm (ZZ *zz, HGraph *hg, int *match, int *limit) { int i, j, k, side = 0, edge, vertex, *Match[2] = {NULL, NULL}; int limits[2], neighbor, next_vertex, pins; double w[2]={0.0,0.0}, weight, max_weight, *sims = NULL; char *yo = "matching_pgm"; limits[0] = limits[1] = *limit; Match[0] = match; if (hg->nVtx) { if (!(Match[1] = (int*) ZOLTAN_MALLOC (hg->nVtx * sizeof(int))) || !(sims = (double*) ZOLTAN_CALLOC (hg->nVtx, sizeof(double))) ) { Zoltan_Multifree (__FILE__, __LINE__, 2, &Match[1], &sims); ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); return ZOLTAN_MEMERR; } } for (i = 0; i < hg->nVtx; i++) Match[1][i] = i; for (i = 0; i < hg->nVtx && limits[side] > 0; i++) { if (Match[0][i] == i && Match[1][i] == i) { vertex = i; while (vertex > 0 && limits[side] > 0) { max_weight = 0.0; next_vertex = -1; for (j = hg->vindex[vertex]; j < hg->vindex[vertex+1]; j++) { edge = hg->vedge[j]; pins = hg->hindex[edge+1] - hg->hindex[edge]; weight = 2.0 / ((pins-1)*pins); if (hg->ewgt) weight *= hg->ewgt[edge]; for (k = hg->hindex[edge]; k < hg->hindex[edge+1]; k++) { neighbor = hg->hvertex[k]; if (neighbor != vertex && Match[0][neighbor] == neighbor && Match[1][neighbor]==neighbor) sims[neighbor] += weight; } } for (j = hg->vindex[vertex]; j < hg->vindex[vertex+1]; j++) { edge = hg->vedge[j]; for (k = hg->hindex[edge]; k < hg->hindex[edge+1]; k++) { neighbor = hg->hvertex[k]; if (sims[neighbor] > 0.0) { if (sims[neighbor] > max_weight) { max_weight = sims[neighbor]; next_vertex = neighbor; } sims[neighbor] = 0.0; } } } if (next_vertex >= 0) { Match[side][vertex] = next_vertex; Match[side][next_vertex] = vertex; limits[side]--; w[side] += max_weight; side = 1-side; } vertex = next_vertex; } } } if (w[0] < w[1]) { for (i = 0; i < hg->nVtx; i++) match[i] = Match[1][i]; *limit = limits[1]; } else *limit = limits[0]; Zoltan_Multifree (__FILE__, __LINE__, 2, &Match[1], &sims); return ZOLTAN_OK; } /******************************************************************************/ #ifdef __cplusplus } /* closing bracket for extern "C" */ #endif