Files
phasicFlow/thirdParty/Zoltan/src/lb/lb_remap.c
2025-05-15 21:58:43 +03:30

870 lines
29 KiB
C

/*
* @HEADER
*
* ***********************************************************************
*
* Zoltan Toolkit for Load-balancing, Partitioning, Ordering and Coloring
* Copyright 2012 Sandia Corporation
*
* Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
* the U.S. Government retains certain rights in this software.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the Corporation nor the names of the
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Questions? Contact Karen Devine kddevin@sandia.gov
* Erik Boman egboman@sandia.gov
*
* ***********************************************************************
*
* @HEADER
*/
#ifdef __cplusplus
/* if C++, define the rest of this header file as extern C */
extern "C" {
#endif
#include "zz_const.h"
#include "phg_hypergraph.h"
#include <limits.h>
/*
* Values indicating how part remapping should be done.
*/
#define ZOLTAN_LB_REMAP_NONE 0
#define ZOLTAN_LB_REMAP_PROCESSORS 1
#define ZOLTAN_LB_REMAP_PARTS 2
#define HEINFO_ENTRIES 3
static int gather_and_build_remap(ZZ *, int *, int, int *);
static int set_remap_type(ZZ *, int *);
static int malloc_HEinfo(ZZ *, int, int **);
static int do_match(ZZ*, HGraph *, int *, int);
static int matching_pgm(ZZ *, HGraph *, int *, int *);
static int local_HEs_from_import_lists(ZZ *, int, int, int *, int *, int *,
int *, int **);
static int local_HEs_from_export_lists(ZZ *, int, int, int *, int *, int *,
int *, int **);
static float measure_stays(ZZ *, HGraph *, int, int *, char *);
/******************************************************************************/
int Zoltan_LB_Remap(
ZZ *zz,
int *new_map, /* Upon return, flag indicating whether part or proc
assignments actually changed due to remapping. */
int nobj, /* # objs the processor knows about after partitioning */
int *proc, /* processors for the objs;
if export_list_flag == 1,
proc contains new proc assignment
else
proc contains old proc assignment
Upon return, proc contains remapped new proc
assignment regardless of export_list_flag's value. */
int *old_part, /* old part assignments for the objs */
int *new_part, /* new part assignments for the objs.
Upon return, new_part contains remapped new
part assignments */
int export_list_flag /* Flag indicating whether the algorithm computes
export lists or import lists. The HG for matching
is built differently depending on whether
the algorithm knows export or import info. */
)
{
char *yo = "Zoltan_LB_Remap";
int ierr = ZOLTAN_OK;
int i;
int remap_type; /* Type of remapping to be done:
Procs, Parts, or None */
int HEcnt = 0; /* Number of local hyperedges */
int *HEinfo = NULL; /* Array of HE info; for each HE, two pins and
one edge weight. Stored as a single vector
to minimize communication calls. */
*new_map = 0;
/* Determine type of remapping that is appropriate */
ierr = set_remap_type(zz, &remap_type);
if (remap_type != ZOLTAN_LB_REMAP_NONE) {
/* Build local hyperedges */
if (export_list_flag)
ierr = local_HEs_from_export_lists(zz, remap_type,
nobj, proc, old_part, new_part,
&HEcnt, &HEinfo);
else
ierr = local_HEs_from_import_lists(zz, remap_type,
nobj, proc, old_part, new_part,
&HEcnt, &HEinfo);
if (ierr < 0) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error building local HEs");
goto End;
}
/* Gather local hyperedges to each processor; build remap vector */
ierr = gather_and_build_remap(zz, new_map, HEcnt, HEinfo);
if (ierr < 0) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo,
"Error returned from gather_and_build_remap.");
goto End;
}
if (*new_map) {
/* Update part and processor information for algorithms */
for (i = 0; i < nobj; i++) {
new_part[i] = zz->LB.Remap[new_part[i]];
proc[i] = Zoltan_LB_Part_To_Proc(zz, new_part[i], NULL);
}
}
}
End:
ZOLTAN_FREE(&HEinfo);
return(ierr);
}
/******************************************************************************/
static int local_HEs_from_import_lists(
ZZ *zz,
int remap_type, /* type of remapping to do: parts, procs, or none. */
int nobj, /* # objs the processor knows about (keep + imports) */
int *proc, /* On input, old processor assignment for each obj;
Upon return, remapped new proc assignment for
each obj. */
int *old_part, /* old part assignments for each objs */
int *new_part, /* On input, new part assignments for each objs.
Upon return, remapped new part assignments */
int *HEcnt, /* # of HEs allocated. */
int **HEinfo /* Array of HE info; for each HE, two pins and
one edge weight. Stored as a single vector
to minimize communication calls. */
)
{
/* Routine to remap parts (to new processors or new part numbers)
* to reduce data movement.
* This routine assumes the load-balancing algorithm built import lists.
* Objects described are those that ENDED UP on my_proc due to load balancing.
* For all these objects, new_proc == my_proc.
*/
char *yo = "local_HEs_from_import_lists";
int ierr = ZOLTAN_OK;
int i, cnt, tmp;
int *tmp_HEinfo;
int old_size; /* # of old entries to remap to. If remapping
parts to processors, old_size = Num_Procs;
if renumbering parts, old_size = old
num parts. */
int fp; /* First part on this processor in new
decomposition. */
int np; /* # of parts on this processor in new
decomposition. */
int my_proc = zz->Proc; /* This processor's rank. */
int minp, maxp; /* Lowest and highest part numbers on this
processor in old decomposition;
part numbers are assumed to be dense,
but no particular distribution is assumed. */
int HEwgt_size; /* # of HE weights allocated. */
int *HEwgt = NULL; /* Array of HE weights. Initially includes
zero weights; later zero-weights are removed.*/
if (remap_type == ZOLTAN_LB_REMAP_PROCESSORS) {
/* Renumber new processors to minimize changes in proc assignment. */
HEwgt_size = zz->Num_Proc;
HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int));
if (!HEwgt) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
ierr = ZOLTAN_MEMERR;
goto End;
}
for (i = 0; i < nobj; i++)
HEwgt[proc[i]]++; /* At this point, proc has old proc assignments */
*HEcnt = 0;
for (i = 0; i < HEwgt_size; i++)
if (HEwgt[i] != 0) (*HEcnt)++;
ierr = malloc_HEinfo(zz, *HEcnt, HEinfo);
if (ierr < 0)
goto End;
tmp_HEinfo = *HEinfo;
cnt = 0;
for (i = 0; i < HEwgt_size; i++) {
if (HEwgt[i] != 0) {
tmp = cnt * HEINFO_ENTRIES;
tmp_HEinfo[tmp] = i; /* Old processor number */
tmp_HEinfo[tmp+1] = my_proc; /* New processor number */
tmp_HEinfo[tmp+2] = HEwgt[i]; /* shift non-zero weights down. */
cnt++;
}
}
}
else { /* ZOLTAN_LB_REMAP_PARTS */
/* Renumber new parts to minimize changes in part assignment */
for (minp = INT_MAX, maxp = 0, i = 0; i < nobj; i++) {
if (old_part[i] < minp) minp = old_part[i];
if (old_part[i] > maxp) maxp = old_part[i];
}
/* Don't include old part numbers that are greater than
* zz->LB.Num_Global_Parts - 1; they are not valid values for
* remapping of new part numbers.
*/
if (minp >= zz->LB.Num_Global_Parts)
minp = zz->LB.Num_Global_Parts-1;
if (maxp >= zz->LB.Num_Global_Parts)
maxp = zz->LB.Num_Global_Parts-1;
old_size = maxp - minp + 1;
Zoltan_LB_Proc_To_Part(zz, my_proc, &np, &fp);
HEwgt_size = np * old_size;
if (HEwgt_size > 0) {
HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int));
if (!HEwgt) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
ierr = ZOLTAN_MEMERR;
goto End;
}
}
for (i = 0; i < nobj; i++) {
if (old_part[i] < zz->LB.Num_Global_Parts) {
/* Include only HEs to old parts numbered
* 0 to zz->LB.Num_Global_Parts-1; these are the only valid
* remapping values for the new part numbers.
*/
tmp = (new_part[i]-fp) * old_size;
HEwgt[tmp + (old_part[i]-minp)]++;
}
}
*HEcnt = 0;
for (i = 0; i < HEwgt_size; i++)
if (HEwgt[i] != 0) (*HEcnt)++;
ierr = malloc_HEinfo(zz, *HEcnt, HEinfo);
if (ierr < 0)
goto End;
tmp_HEinfo = *HEinfo;
cnt = 0;
for (i = 0; i < HEwgt_size; i++) {
if (HEwgt[i] != 0) {
tmp = cnt * HEINFO_ENTRIES;
tmp_HEinfo[tmp] = i%old_size + minp; /* Old part number */
tmp_HEinfo[tmp+1] = i/old_size + fp; /* New part number */
tmp_HEinfo[tmp+2] = HEwgt[i]; /* shift non-zero weights down. */
cnt++;
}
}
}
End:
if (HEwgt) ZOLTAN_FREE(&HEwgt);
return ierr;
}
/******************************************************************************/
static int local_HEs_from_export_lists(
ZZ *zz,
int remap_type, /* type of remapping to do: parts, procs, or none. */
int nobj, /* # objs the processor knows about (keep + exports) */
int *new_proc, /* On input, new processor assignment for each obj;
Upon return, remapped new proc assignment for
each obj. */
int *old_part, /* old part assignments for each objs */
int *new_part, /* On input, new part assignments for each objs.
Upon return, remapped new part assignments */
int *HEcnt, /* # of HEs allocated. */
int **HEinfo /* Array of HE info; for each HE, two pins and
one edge weight. Stored as a single vector
to minimize communication calls. */
)
{
/* Routine to remap parts (to new processors or new part numbers)
* to reduce data movement.
* This routine assumes the load-balancing algorithm built export lists.
* Objects described are those that STARTED on zz->Proc due to load balancing.
* For all these objects, old_proc == zz->Proc.
*/
char *yo = "local_HEs_from_export_lists";
int ierr = ZOLTAN_OK;
int i, cnt, tmp;
int *tmp_HEinfo;
int my_proc = zz->Proc; /* This processor's rank. */
int nimp = 0;
int *imp_proc = NULL, /* Temporary arrays if inversion of export to */
*imp_old_part = NULL, /* import lists is needed. */
*imp_new_part = NULL;
int HEwgt_size; /* # of HE weights allocated. */
int *HEwgt = NULL; /* Array of HE weights. Initially includes
zero weights; later zero-weights are removed.*/
if (remap_type == ZOLTAN_LB_REMAP_PROCESSORS) {
/* Build HEs based on processor assignment.
* We know the old processor for all objects we are keeping and all
* export objects -- it is my_proc!
* We also know the new processor number for all objects initially on
* my_proc (since we built export lists.)
* This case is a special case of part remapping; it is easy to
* build the hyperedges in this special case.
*/
HEwgt_size = zz->Num_Proc;
HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int));
if (!HEwgt) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
ierr = ZOLTAN_MEMERR;
goto End;
}
for (i = 0; i < nobj; i++)
HEwgt[new_proc[i]]++;
*HEcnt = 0;
for (i = 0; i < HEwgt_size; i++)
if (HEwgt[i] != 0) (*HEcnt)++;
ierr = malloc_HEinfo(zz, *HEcnt, HEinfo);
if (ierr < 0)
goto End;
tmp_HEinfo = *HEinfo;
cnt = 0;
for (i = 0; i < HEwgt_size; i++) {
if (HEwgt[i] != 0) {
tmp = cnt * HEINFO_ENTRIES;
tmp_HEinfo[tmp] = my_proc; /* Old processor number */
tmp_HEinfo[tmp+1] = i; /* New processor number */
tmp_HEinfo[tmp+2] = HEwgt[i]; /* shift non-zero weights down. */
cnt++;
}
}
}
else { /* ZOLTAN_LB_REMAP_PARTS */
/* Cannot renumber parts given export lists without summing HE weights
* across processors. This summation is not straightforward. Also, a
* potentially large number of HEs may exist
* (max_old_part_number * zz->Num_Global_Parts). Rather than build
* this large matrix, just compute import lists from the export lists
* and run the import-list algorithm.
*/
ZOLTAN_COMM_OBJ *plan;
int msg_tag = 22345;
ierr = Zoltan_Comm_Create(&plan, nobj, new_proc, zz->Communicator,
msg_tag, &nimp);
if (nimp > 0) {
imp_proc = (int *) ZOLTAN_MALLOC(3 * nimp * sizeof(int));
imp_old_part = imp_proc + nimp;
imp_new_part = imp_old_part + nimp;
if (!imp_proc) {
ierr = ZOLTAN_MEMERR;
ZOLTAN_PRINT_ERROR(my_proc, yo, "Memory error.");
goto End;
}
}
ierr = Zoltan_Comm_Info(plan, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, imp_proc, NULL);
msg_tag++;
ierr = Zoltan_Comm_Do(plan, msg_tag, (char *) old_part, sizeof(int),
(char *) imp_old_part);
msg_tag++;
ierr = Zoltan_Comm_Do(plan, msg_tag, (char *) new_part, sizeof(int),
(char *) imp_new_part);
Zoltan_Comm_Destroy(&plan);
ierr = local_HEs_from_import_lists(zz, remap_type, nimp, imp_proc,
imp_old_part, imp_new_part,
HEcnt, HEinfo);
}
End:
if (HEwgt) ZOLTAN_FREE(&HEwgt);
if (imp_proc) ZOLTAN_FREE(&imp_proc);
return ierr;
}
/******************************************************************************/
static int set_remap_type(
ZZ *zz,
int *remap_type
)
{
int ierr = ZOLTAN_OK;
/* Set remap type based on distribution of parts to processors. */
if (zz->LB.Remap_Flag == 0) {
/* No remapping requested */
*remap_type = ZOLTAN_LB_REMAP_NONE;
}
else if (!(zz->LB.Uniform_Parts)) {
/* Remapping does not respect requested non-uniform part sizes;
no remapping done. */
*remap_type = ZOLTAN_LB_REMAP_NONE;
ierr = ZOLTAN_WARN;
}
else if (!(zz->LB.Single_Proc_Per_Part)) {
/* Some parts spread across >1 processor; remapping not supported. */
*remap_type = ZOLTAN_LB_REMAP_NONE;
ierr = ZOLTAN_WARN;
}
else if (zz->LB.PartDist == NULL) {
/* # Parts == # Processors, uniformly distributed; remap processors */
*remap_type = ZOLTAN_LB_REMAP_PROCESSORS;
}
else {
/* # Parts != # processors, or parts not uniformly distributed */
*remap_type = ZOLTAN_LB_REMAP_PARTS;
}
return ierr;
}
/******************************************************************************/
static int do_match(
ZZ *zz,
HGraph *hg, /* Hypergraph data structure on which to do the matching. */
int *match, /* Matching array -- output */
int limit /* max number of matches that are allowed */
)
{
/* Temporary function; will be replace by a real matching function later. */
int ierr = ZOLTAN_OK;
int i;
/* Default initialization -- no change in mapping */
for (i = 0; i < hg->nVtx; i++)
match[i] = i;
ierr = matching_pgm(zz, hg, match, &limit);
return ierr;
}
/******************************************************************************/
static int malloc_HEinfo(
ZZ *zz,
int HEcnt, /* Number of HEs to allocate */
int **HEinfo /* Array of HE info; for each HE, two pins and
one edge weight. Stored as a single vector
to minimize communication calls. */
)
{
/* Routine for allocating HEs to use in remap's matching routine. */
char *yo = "malloc_HEinfo";
int ierr = ZOLTAN_OK;
if (HEcnt) {
*HEinfo = (int *) ZOLTAN_MALLOC(HEINFO_ENTRIES * HEcnt * sizeof(int));
if (*HEinfo == NULL) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
ierr = ZOLTAN_MEMERR;
}
}
else
*HEinfo = NULL;
return ierr;
}
/******************************************************************************/
static int gather_and_build_remap(
ZZ *zz,
int *new_map, /* Upon return, flag indicating whether parts
assignments were changed due to remap. */
int HEcnt, /* # of HEs allocated. */
int *HEinfo /* Array of HE info; for each HE, two pins and
one edge weight. Stored as a single vector
to minimize communication calls. */
)
{
char *yo = "gather_and_remap";
int ierr = ZOLTAN_OK;
int i, uidx, tmp;
int *each_size = NULL; /* sizes (# HEs * HEINFO_ENTRIES) for each proc */
int *recvbuf = NULL; /* Receive buffer for gatherv */
int *displs = NULL; /* Displacement buffer for gatherv */
int send_size; /* Local # HEs * HEINFO_ENTRIES */
int total_size; /* Total # ints in gatherv */
int total_HEcnt; /* Total (across all procs) number of HEs. */
int max0, max1; /* Max values of pin 0 and pin 1 for each HE. */
int *match = NULL; /* Vector describing the matching.
match[i] = j ==> match[j] = i ==>
vertices i and j are matched. */
int *used = NULL; /* Vector indicating which parts are used
in the matching. */
int limit; /* Maximum number of matches that are allowed */
HGraph hg; /* Hypergraph for matching */
float before_remap = 0, /* Amount of data that overlaps between old and */
after_remap = 0; /* new decomposition before and after remapping,
respectively. */
float with_oldremap = 0; /* Amount of data that overlaps between old and
new decomposition using the OldRemap vector
(remapping from the previous decomposition). */
/* Gather HEs from each processor into a local complete HG. */
each_size = (int *) ZOLTAN_MALLOC(zz->Num_Proc * sizeof(int));
if (!each_size) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
ierr = ZOLTAN_MEMERR;
goto End;
}
send_size = HEcnt * HEINFO_ENTRIES;
MPI_Allgather(&send_size, 1, MPI_INT, each_size, 1, MPI_INT,
zz->Communicator);
for (total_size = 0, i = 0; i < zz->Num_Proc; i++) {
total_size += each_size[i];
}
recvbuf = (int *) ZOLTAN_MALLOC((zz->Num_Proc + total_size) * sizeof(int));
displs = recvbuf + total_size;
if (!recvbuf) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
ierr = ZOLTAN_MEMERR;
goto End;
}
displs[0] = 0;
for (i = 1; i < zz->Num_Proc; i++)
displs[i] = displs[i-1] + each_size[i-1];
MPI_Allgatherv(HEinfo, send_size, MPI_INT,
recvbuf, each_size, displs, MPI_INT, zz->Communicator);
total_HEcnt = total_size / HEINFO_ENTRIES;
for (max0 = -1, max1 = -1, i = 0; i < total_HEcnt; i++) {
tmp = i * HEINFO_ENTRIES;
if (recvbuf[tmp] > max0) max0 = recvbuf[tmp];
if (recvbuf[tmp+1] > max1) max1 = recvbuf[tmp+1];
}
/* Increment max0 and max1 so that they are the maximum number of unique
pin values for pin0 and pin1 respectively; i.e., allow pin value == 0. */
max0++;
max1++;
/* Sanity check */
/* Ideally, max1 should equal LB.Num_Global_Parts, but ParMETIS3 sometimes
* does not return the correct number of non-empty parts, allowing
* max1 to be less than LB.Num_Global_Parts.
* (e.g., ewgt.adaptive-partlocal1-v3.4.?).
*/
if (max1 > zz->LB.Num_Global_Parts)
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Unexpected value for max1.");
/* Set up global HG */
Zoltan_HG_HGraph_Init(&hg);
if (total_HEcnt) {
hg.nVtx = max0 + zz->LB.Num_Global_Parts;
hg.nEdge = total_HEcnt;
hg.nPins = total_HEcnt * 2; /* two pins per HE */
hg.EdgeWeightDim = 1;
hg.ewgt = (float *) ZOLTAN_MALLOC(total_HEcnt * sizeof(float));
hg.hindex = (int *) ZOLTAN_MALLOC((total_HEcnt + 1) * sizeof(int));
hg.hvertex = (int *) ZOLTAN_MALLOC((hg.nPins) * sizeof(int));
if (!hg.ewgt || !hg.hindex || !hg.hvertex) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
ierr = ZOLTAN_MEMERR;
goto End;
}
for (i = 0; i < total_HEcnt; i++) {
tmp = i * HEINFO_ENTRIES;
hg.hindex[i] = i+i;
hg.hvertex[i+i] = recvbuf[tmp];
hg.hvertex[i+i+1] = (int)recvbuf[tmp+1]+max0;
hg.ewgt[i] = recvbuf[tmp+2];
}
hg.hindex[total_HEcnt] = total_HEcnt + total_HEcnt;
ierr = Zoltan_HG_Create_Mirror(zz, &hg);
if (ierr < 0) goto End;
}
before_remap = measure_stays(zz, &hg, max0, NULL, "BEFORE");
/* Compute the amount of overlap when using the old remap vector. */
with_oldremap = measure_stays(zz, &hg, max0, zz->LB.OldRemap, "WITHOLD");
/* Do matching */
match = (int *) ZOLTAN_CALLOC(hg.nVtx + zz->LB.Num_Global_Parts, sizeof(int));
used = match + hg.nVtx;
if (hg.nVtx && !match) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
ierr = ZOLTAN_MEMERR;
goto End;
}
/* Max # matches allowed */
limit = (max0 < zz->LB.Num_Global_Parts ? max0 : zz->LB.Num_Global_Parts);
do_match(zz, &hg, match, limit);
/* Build remapping vector, if non-trivial matching was returned. */
*new_map = 0;
for (i = 0; i < zz->LB.Num_Global_Parts; i++)
if (match[i+max0] != i+max0) {
*new_map = 1;
break;
}
if (*new_map) {
zz->LB.Remap = (int *) ZOLTAN_MALLOC(zz->LB.Num_Global_Parts * sizeof(int));
if (!(zz->LB.Remap)) {
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
ierr = ZOLTAN_MEMERR;
goto End;
}
/* First, process all parts that were matched. Mark matched parts as used.*/
for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
zz->LB.Remap[i] = -1;
tmp = match[i+max0];
if (tmp != i+max0) {
zz->LB.Remap[i] = tmp;
used[tmp] = 1;
}
}
/* Second, process unmatched parts; if possible, keep same part number. */
for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
if (zz->LB.Remap[i] > -1) continue; /* Already processed part i */
/* match[i+max0] == i+max0 */
if (!used[i]) { /* Keep the same part number if it is not used */
zz->LB.Remap[i] = i;
used[i] = 1;
}
}
/* Third, process remaining unmatched parts; assign them to
unused parts.*/
for (uidx = 0, i = 0; i < zz->LB.Num_Global_Parts; i++) {
if (zz->LB.Remap[i] > -1) continue; /* Already processed part i */
/* match[i+max0] == i+max0 */
while (used[uidx]) uidx++; /* Find next unused part */
zz->LB.Remap[i] = uidx;
used[uidx] = 1;
}
}
if (*new_map)
after_remap = measure_stays(zz, &hg, max0, zz->LB.Remap, "AFTER ");
if ((before_remap >= after_remap) && (before_remap >= with_oldremap)) {
/* No benefit from remapping; don't keep it! */
ZOLTAN_FREE(&zz->LB.Remap);
ZOLTAN_FREE(&zz->LB.OldRemap);
*new_map = 0;
}
else if (with_oldremap >= after_remap) {
/* The old remap vector is better than the new one; keep the old one. */
ZOLTAN_FREE(&zz->LB.Remap);
zz->LB.Remap = zz->LB.OldRemap;
zz->LB.OldRemap = NULL;
*new_map = 1;
}
else {
/* Going to use the new remap vector; free the old one. */
ZOLTAN_FREE(&zz->LB.OldRemap);
}
if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->Proc == zz->Debug_Proc &&
zz->LB.Remap)
for (i = 0; i < zz->LB.Num_Global_Parts; i++)
printf("%d REMAP Part %d to Part %d\n", zz->Proc, i, zz->LB.Remap[i]);
End:
ZOLTAN_FREE(&match);
ZOLTAN_FREE(&each_size);
ZOLTAN_FREE(&recvbuf);
Zoltan_HG_HGraph_Free(&hg);
return ierr;
}
/******************************************************************************/
static float measure_stays(
ZZ *zz,
HGraph *hg,
int max0,
int *remapvec,
char *when
)
{
/* Routine that measures and prints the amount of data that doesn't move
* as described by the hypergraph.
*/
float stay = 0.;
int tmp, i;
for (i = 0; i < hg->nEdge; i++) {
tmp = i + i;
if (remapvec) {
if (hg->hvertex[tmp] == (int)remapvec[hg->hvertex[tmp+1]-max0])
stay += hg->ewgt[i];
}
else {
if (hg->hvertex[tmp] == (hg->hvertex[tmp+1]-max0))
stay += hg->ewgt[i];
}
}
if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->Proc == zz->Debug_Proc)
printf("%d REMAP--%s: TOTAL AMT STAY = %g\n\n",
zz->Proc, when, stay);
return(stay);
}
/******************************************************************************/
/* path growing matching, hypergraph version */
static int matching_pgm (ZZ *zz, HGraph *hg, int *match, int *limit)
{
int i, j, k, side = 0, edge, vertex, *Match[2] = {NULL, NULL};
int limits[2], neighbor, next_vertex, pins;
double w[2]={0.0,0.0}, weight, max_weight, *sims = NULL;
char *yo = "matching_pgm";
limits[0] = limits[1] = *limit;
Match[0] = match;
if (hg->nVtx) {
if (!(Match[1] = (int*) ZOLTAN_MALLOC (hg->nVtx * sizeof(int)))
|| !(sims = (double*) ZOLTAN_CALLOC (hg->nVtx, sizeof(double))) ) {
Zoltan_Multifree (__FILE__, __LINE__, 2, &Match[1], &sims);
ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
return ZOLTAN_MEMERR;
}
}
for (i = 0; i < hg->nVtx; i++)
Match[1][i] = i;
for (i = 0; i < hg->nVtx && limits[side] > 0; i++) {
if (Match[0][i] == i && Match[1][i] == i) {
vertex = i;
while (vertex > 0 && limits[side] > 0) {
max_weight = 0.0;
next_vertex = -1;
for (j = hg->vindex[vertex]; j < hg->vindex[vertex+1]; j++) {
edge = hg->vedge[j];
pins = hg->hindex[edge+1] - hg->hindex[edge];
weight = 2.0 / ((pins-1)*pins);
if (hg->ewgt)
weight *= hg->ewgt[edge];
for (k = hg->hindex[edge]; k < hg->hindex[edge+1]; k++) {
neighbor = hg->hvertex[k];
if (neighbor != vertex && Match[0][neighbor] == neighbor &&
Match[1][neighbor]==neighbor)
sims[neighbor] += weight;
}
}
for (j = hg->vindex[vertex]; j < hg->vindex[vertex+1]; j++) {
edge = hg->vedge[j];
for (k = hg->hindex[edge]; k < hg->hindex[edge+1]; k++) {
neighbor = hg->hvertex[k];
if (sims[neighbor] > 0.0) {
if (sims[neighbor] > max_weight) {
max_weight = sims[neighbor];
next_vertex = neighbor;
}
sims[neighbor] = 0.0;
}
}
}
if (next_vertex >= 0) {
Match[side][vertex] = next_vertex;
Match[side][next_vertex] = vertex;
limits[side]--;
w[side] += max_weight;
side = 1-side;
}
vertex = next_vertex;
}
}
}
if (w[0] < w[1]) {
for (i = 0; i < hg->nVtx; i++)
match[i] = Match[1][i];
*limit = limits[1];
}
else
*limit = limits[0];
Zoltan_Multifree (__FILE__, __LINE__, 2, &Match[1], &sims);
return ZOLTAN_OK;
}
/******************************************************************************/
#ifdef __cplusplus
} /* closing bracket for extern "C" */
#endif