commit
30d5349fcf
10
README.md
10
README.md
|
@ -5,8 +5,16 @@
|
||||||
|
|
||||||
**PhasicFlow** is a parallel C++ code for performing DEM simulations. It can run on shared-memory multi-core computational units such as multi-core CPUs or GPUs (for now it works on CUDA-enabled GPUs). The parallelization method mainly relies on loop-level parallelization on a shared-memory computational unit. You can build and run PhasicFlow in serial mode on regular PCs, in parallel mode for multi-core CPUs, or build it for a GPU device to off-load computations to a GPU. In its current statues you can simulate millions of particles (up to 80M particles tested) on a single desktop computer. You can see the [performance tests of PhasicFlow](https://github.com/PhasicFlow/phasicFlow/wiki/Performance-of-phasicFlow) in the wiki page.
|
**PhasicFlow** is a parallel C++ code for performing DEM simulations. It can run on shared-memory multi-core computational units such as multi-core CPUs or GPUs (for now it works on CUDA-enabled GPUs). The parallelization method mainly relies on loop-level parallelization on a shared-memory computational unit. You can build and run PhasicFlow in serial mode on regular PCs, in parallel mode for multi-core CPUs, or build it for a GPU device to off-load computations to a GPU. In its current statues you can simulate millions of particles (up to 80M particles tested) on a single desktop computer. You can see the [performance tests of PhasicFlow](https://github.com/PhasicFlow/phasicFlow/wiki/Performance-of-phasicFlow) in the wiki page.
|
||||||
|
|
||||||
|
**MPI** parallelization with dynamic load balancing is under development. With this level of parallelization, PhasicFlow can leverage the computational power of **multi-gpu** workstations or clusters with distributed memory CPUs.
|
||||||
|
In summary PhasicFlow can have 6 execution modes:
|
||||||
|
1. Serial on a single CPU core,
|
||||||
|
2. Parallel on a multi-core computer/node (using OpenMP),
|
||||||
|
3. Parallel on an nvidia-GPU (using Cuda),
|
||||||
|
4. Parallel on distributed memory workstation (Using MPI)
|
||||||
|
5. Parallel on distributed memory workstations with multi-core nodes (using MPI+OpenMP)
|
||||||
|
6. Parallel on workstations with multiple GPUs (using MPI+Cuda).
|
||||||
## How to build?
|
## How to build?
|
||||||
You can build PhasicFlow for CPU and GPU executions. [Here is a complete step-by-step procedure](https://github.com/PhasicFlow/phasicFlow/wiki/How-to-Build-PhasicFlow).
|
You can build PhasicFlow for CPU and GPU executions. The latest release of PhasicFlow is v-0.1. [Here is a complete step-by-step procedure for building phasicFlow-v-0.1.](https://github.com/PhasicFlow/phasicFlow/wiki/How-to-Build-PhasicFlow).
|
||||||
|
|
||||||
## Online code documentation
|
## Online code documentation
|
||||||
You can find a full documentation of the code, its features, and other related materials on [online documentation of the code](https://phasicflow.github.io/phasicFlow/)
|
You can find a full documentation of the code, its features, and other related materials on [online documentation of the code](https://phasicflow.github.io/phasicFlow/)
|
||||||
|
|
|
@ -130,9 +130,9 @@ public:
|
||||||
csPairContainerType& pwPairs,
|
csPairContainerType& pwPairs,
|
||||||
bool force = false) override
|
bool force = false) override
|
||||||
{
|
{
|
||||||
ppTimer().start();
|
|
||||||
|
|
||||||
Particles().boundingSphere().updateBoundaries(DataDirection::SlaveToMaster);
|
|
||||||
|
ppTimer().start();
|
||||||
|
|
||||||
const auto& position = Particles().pointPosition().deviceViewAll();
|
const auto& position = Particles().pointPosition().deviceViewAll();
|
||||||
const auto& flags = Particles().dynPointStruct().activePointsMaskDevice();
|
const auto& flags = Particles().dynPointStruct().activePointsMaskDevice();
|
||||||
|
@ -167,6 +167,7 @@ public:
|
||||||
csPairContainerType& pwPairs,
|
csPairContainerType& pwPairs,
|
||||||
bool force = false)override
|
bool force = false)override
|
||||||
{
|
{
|
||||||
|
Particles().boundingSphere().updateBoundaries(DataDirection::SlaveToMaster);
|
||||||
return csBoundaries_[i].broadSearch(
|
return csBoundaries_[i].broadSearch(
|
||||||
iter,
|
iter,
|
||||||
t,
|
t,
|
||||||
|
@ -176,7 +177,6 @@ public:
|
||||||
force);
|
force);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool enterBroadSearch(uint32 iter, real t, real dt)const override
|
bool enterBroadSearch(uint32 iter, real t, real dt)const override
|
||||||
{
|
{
|
||||||
if(ppwContactSearch_)
|
if(ppwContactSearch_)
|
||||||
|
|
|
@ -85,7 +85,7 @@ bool pFlow::processorBoundaryContactSearch::broadSearch
|
||||||
{
|
{
|
||||||
if(masterSearch_)
|
if(masterSearch_)
|
||||||
{
|
{
|
||||||
/*const auto thisPoints = boundary().thisPoints();
|
const auto thisPoints = boundary().thisPoints();
|
||||||
const auto& neighborProcPoints = boundary().neighborProcPoints();
|
const auto& neighborProcPoints = boundary().neighborProcPoints();
|
||||||
const auto& bDiams = diameter_.BoundaryField(thisBoundaryIndex());
|
const auto& bDiams = diameter_.BoundaryField(thisBoundaryIndex());
|
||||||
const auto thisDiams = bDiams.thisField();
|
const auto thisDiams = bDiams.thisField();
|
||||||
|
@ -96,9 +96,9 @@ bool pFlow::processorBoundaryContactSearch::broadSearch
|
||||||
thisPoints,
|
thisPoints,
|
||||||
thisDiams,
|
thisDiams,
|
||||||
neighborProcPoints,
|
neighborProcPoints,
|
||||||
neighborProcDiams);
|
neighborProcDiams
|
||||||
|
);
|
||||||
pOutput<<"ppPairs size in boundary"<< ppPairs.size()<<endl; */
|
//pOutput<<"ppSize "<< ppPairs.size()<<endl;
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
}else
|
}else
|
||||||
|
|
|
@ -99,7 +99,7 @@ bool pFlow::twoPartContactSearch::broadSearchPP
|
||||||
ppPairs.increaseCapacityBy(len);
|
ppPairs.increaseCapacityBy(len);
|
||||||
|
|
||||||
INFORMATION<< "Particle-particle contact pair container capacity increased from "<<
|
INFORMATION<< "Particle-particle contact pair container capacity increased from "<<
|
||||||
oldCap << " to "<<ppPairs.capacity()<<" in peiodicBoundaryContactSearch."<<END_INFO;
|
oldCap << " to "<<ppPairs.capacity()<<" in contact search in boundary region."<<END_INFO;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,9 +32,7 @@ pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::processorBoundarySpher
|
||||||
geomMotion
|
geomMotion
|
||||||
),
|
),
|
||||||
masterInteraction_(boundary.isBoundaryMaster())
|
masterInteraction_(boundary.isBoundaryMaster())
|
||||||
{
|
{}
|
||||||
pOutput<<"Processor boundayrCondition for "<< boundary.name()<<endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename cFM, typename gMM>
|
template <typename cFM, typename gMM>
|
||||||
bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInteraction
|
bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInteraction
|
||||||
|
@ -43,13 +41,13 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
|
||||||
const ContactForceModel &cfModel
|
const ContactForceModel &cfModel
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
return true;
|
||||||
if(!masterInteraction_) return true;
|
if(!masterInteraction_) return true;
|
||||||
|
|
||||||
const auto & sphPar = this->sphParticles();
|
const auto & sphPar = this->sphParticles();
|
||||||
uint32 thisIndex = this->boundary().thisBoundaryIndex();
|
uint32 thisIndex = this->boundary().thisBoundaryIndex();
|
||||||
const auto& a = sphPar.diameter().BoundaryField(thisIndex).neighborProcField().deviceViewAll();
|
pOutput<<"beofre sphereSphereInteraction"<<endl;
|
||||||
|
pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
|
||||||
/*pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
|
|
||||||
dt,
|
dt,
|
||||||
this->ppPairs(),
|
this->ppPairs(),
|
||||||
cfModel,
|
cfModel,
|
||||||
|
@ -67,7 +65,9 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
|
||||||
sphPar.rVelocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
|
sphPar.rVelocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
|
||||||
sphPar.contactForce().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
|
sphPar.contactForce().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
|
||||||
sphPar.contactTorque().BoundaryField(thisIndex).neighborProcField().deviceViewAll()
|
sphPar.contactTorque().BoundaryField(thisIndex).neighborProcField().deviceViewAll()
|
||||||
);*/
|
);
|
||||||
|
|
||||||
|
pOutput<<"after sphereSphereInteraction"<<endl;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
|
@ -166,12 +166,12 @@ bool pFlow::sphereInteraction<cFM,gMM, cLT>::iterate()
|
||||||
|
|
||||||
bool broadSearch = contactSearch_().enterBroadSearch(iter, t, dt);
|
bool broadSearch = contactSearch_().enterBroadSearch(iter, t, dt);
|
||||||
|
|
||||||
/*sphParticles_.diameter().updateBoundaries(DataDirection::SlaveToMaster);
|
sphParticles_.diameter().updateBoundaries(DataDirection::SlaveToMaster);
|
||||||
sphParticles_.velocity().updateBoundaries(DataDirection::SlaveToMaster);
|
sphParticles_.velocity().updateBoundaries(DataDirection::SlaveToMaster);
|
||||||
sphParticles_.rVelocity().updateBoundaries(DataDirection::SlaveToMaster);
|
sphParticles_.rVelocity().updateBoundaries(DataDirection::SlaveToMaster);
|
||||||
sphParticles_.mass().updateBoundaries(DataDirection::SlaveToMaster);
|
sphParticles_.mass().updateBoundaries(DataDirection::SlaveToMaster);
|
||||||
sphParticles_.I().updateBoundaries(DataDirection::SlaveToMaster);
|
sphParticles_.I().updateBoundaries(DataDirection::SlaveToMaster);
|
||||||
sphParticles_.propertyId().updateBoundaries(DataDirection::SlaveToMaster);*/
|
sphParticles_.propertyId().updateBoundaries(DataDirection::SlaveToMaster);
|
||||||
|
|
||||||
|
|
||||||
if(broadSearch)
|
if(broadSearch)
|
||||||
|
|
|
@ -238,6 +238,18 @@ inline auto send(span<T> data, int dest, int tag, Comm comm)
|
||||||
comm);
|
comm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline auto send(const T& data, int dest, int tag, Comm comm)
|
||||||
|
{
|
||||||
|
return MPI_Send(
|
||||||
|
&data,
|
||||||
|
sFactor<T>(),
|
||||||
|
Type<T>(),
|
||||||
|
dest,
|
||||||
|
tag,
|
||||||
|
comm);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline auto Isend(span<T> data, int dest, int tag, Comm comm, Request* req)
|
inline auto Isend(span<T> data, int dest, int tag, Comm comm, Request* req)
|
||||||
{
|
{
|
||||||
|
@ -277,6 +289,19 @@ inline auto recv(span<T> data, int source, int tag, Comm comm, Status *status)
|
||||||
status);
|
status);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline auto recv(T& data, int source, int tag, Comm comm, Status *status)
|
||||||
|
{
|
||||||
|
return MPI_Recv(
|
||||||
|
&data,
|
||||||
|
sFactor<T>(),
|
||||||
|
Type<T>(),
|
||||||
|
source,
|
||||||
|
tag,
|
||||||
|
comm,
|
||||||
|
status);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline auto Irecv(T& data, int source, int tag, Comm comm, Request* req)
|
inline auto Irecv(T& data, int source, int tag, Comm comm, Request* req)
|
||||||
{
|
{
|
||||||
|
|
|
@ -24,13 +24,13 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::checkDataRecieved() const
|
||||||
{
|
{
|
||||||
if (!dataRecieved_)
|
if (!dataRecieved_)
|
||||||
{
|
{
|
||||||
//uint32 nRecv = reciever_.waitComplete();
|
uint32 nRecv = reciever_.waitBufferForUse();
|
||||||
dataRecieved_ = true;
|
dataRecieved_ = true;
|
||||||
/*if (nRecv != this->neighborProcSize())
|
if (nRecv != this->neighborProcSize())
|
||||||
{
|
{
|
||||||
fatalErrorInFunction;
|
fatalErrorInFunction;
|
||||||
fatalExit;
|
fatalExit;
|
||||||
}*/
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
|
||||||
DataDirection direction
|
DataDirection direction
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
/*if (step == 1)
|
if (step == 1)
|
||||||
{
|
{
|
||||||
// Isend
|
// Isend
|
||||||
if (direction == DataDirection::TwoWay ||
|
if (direction == DataDirection::TwoWay ||
|
||||||
|
@ -67,7 +67,7 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
|
||||||
{
|
{
|
||||||
fatalErrorInFunction << "Invalid step number " << step << endl;
|
fatalErrorInFunction << "Invalid step number " << step << endl;
|
||||||
return false;
|
return false;
|
||||||
}*/
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -90,6 +90,8 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::processorBoundaryField(
|
||||||
boundary.mirrorBoundaryIndex()
|
boundary.mirrorBoundaryIndex()
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
this->addEvent(message::BNDR_PROCTRANS1).
|
||||||
|
addEvent(message::BNDR_PROCTRANS2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T, class MemorySpace>
|
template<class T, class MemorySpace>
|
||||||
|
|
|
@ -52,7 +52,7 @@ private:
|
||||||
|
|
||||||
dataSender<T, MemorySpace> sender_;
|
dataSender<T, MemorySpace> sender_;
|
||||||
|
|
||||||
mutable dataReciever<T, MemorySpace> reciever_;
|
dataReciever<T, MemorySpace> reciever_;
|
||||||
|
|
||||||
mutable bool dataRecieved_ = true;
|
mutable bool dataRecieved_ = true;
|
||||||
|
|
||||||
|
@ -82,7 +82,6 @@ public:
|
||||||
|
|
||||||
ProcVectorType& neighborProcField() override;
|
ProcVectorType& neighborProcField() override;
|
||||||
|
|
||||||
|
|
||||||
const ProcVectorType& neighborProcField()const override;
|
const ProcVectorType& neighborProcField()const override;
|
||||||
|
|
||||||
bool hearChanges
|
bool hearChanges
|
||||||
|
|
|
@ -21,15 +21,13 @@ Licence:
|
||||||
#include "boundaryProcessor.hpp"
|
#include "boundaryProcessor.hpp"
|
||||||
#include "dictionary.hpp"
|
#include "dictionary.hpp"
|
||||||
#include "mpiCommunication.hpp"
|
#include "mpiCommunication.hpp"
|
||||||
|
#include "boundaryBaseKernels.hpp"
|
||||||
|
#include "internalPoints.hpp"
|
||||||
|
|
||||||
void
|
void
|
||||||
pFlow::MPI::boundaryProcessor::checkSize() const
|
pFlow::MPI::boundaryProcessor::checkSize() const
|
||||||
{
|
{
|
||||||
if (!sizeObtained_)
|
|
||||||
{
|
|
||||||
//MPI_Wait(&sizeRequest_, StatusIgnore);
|
|
||||||
sizeObtained_ = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -37,13 +35,13 @@ pFlow::MPI::boundaryProcessor::checkDataRecieved() const
|
||||||
{
|
{
|
||||||
if (!dataRecieved_)
|
if (!dataRecieved_)
|
||||||
{
|
{
|
||||||
//uint32 nRecv = reciever_.waitComplete();
|
uint32 nRecv = reciever_.waitBufferForUse();
|
||||||
dataRecieved_ = true;
|
dataRecieved_ = true;
|
||||||
/*if (nRecv != neighborProcSize())
|
if (nRecv != neighborProcSize())
|
||||||
{
|
{
|
||||||
fatalErrorInFunction;
|
fatalErrorInFunction;
|
||||||
fatalExit;
|
fatalExit;
|
||||||
}*/
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,8 +90,7 @@ pFlow::MPI::boundaryProcessor::beforeIteration(uint32 iterNum, real t, real dt)
|
||||||
pFlowProcessors().localCommunicator(),
|
pFlowProcessors().localCommunicator(),
|
||||||
MPI_STATUS_IGNORE
|
MPI_STATUS_IGNORE
|
||||||
);
|
);
|
||||||
|
MPI_Request_free(&req);
|
||||||
sizeObtained_ = false;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -135,6 +132,105 @@ pFlow::MPI::boundaryProcessor::updataBoundary(int step)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool pFlow::MPI::boundaryProcessor::transferData(int step)
|
||||||
|
{
|
||||||
|
if(step==1)
|
||||||
|
{
|
||||||
|
uint32 s = size();
|
||||||
|
uint32Vector_D transferFlags("transferFlags",s+1, s+1, RESERVE());
|
||||||
|
transferFlags.fill(0u);
|
||||||
|
|
||||||
|
const auto& transferD = transferFlags.deviceViewAll();
|
||||||
|
auto points = thisPoints();
|
||||||
|
auto p = boundaryPlane().infPlane();
|
||||||
|
|
||||||
|
numToTransfer_ = 0;
|
||||||
|
|
||||||
|
Kokkos::parallel_reduce
|
||||||
|
(
|
||||||
|
"boundaryProcessor::afterIteration",
|
||||||
|
deviceRPolicyStatic(0,s),
|
||||||
|
LAMBDA_HD(uint32 i, uint32& transferToUpdate)
|
||||||
|
{
|
||||||
|
if(p.pointInNegativeSide(points(i)))
|
||||||
|
{
|
||||||
|
transferD(i)=1;
|
||||||
|
transferToUpdate++;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
numToTransfer_
|
||||||
|
);
|
||||||
|
|
||||||
|
uint32Vector_D keepIndices("keepIndices");
|
||||||
|
if(numToTransfer_ != 0u)
|
||||||
|
{
|
||||||
|
pFlow::boundaryBaseKernels::createRemoveKeepIndices
|
||||||
|
(
|
||||||
|
indexList(),
|
||||||
|
numToTransfer_,
|
||||||
|
transferFlags,
|
||||||
|
transferIndices_,
|
||||||
|
keepIndices,
|
||||||
|
false
|
||||||
|
);
|
||||||
|
// delete transfer point from this processor
|
||||||
|
if( !setRemoveKeepIndices(transferIndices_, keepIndices))
|
||||||
|
{
|
||||||
|
fatalErrorInFunction<<
|
||||||
|
"error in setting transfer and keep points in boundary "<< name()<<endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
transferIndices_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto req = RequestNull;
|
||||||
|
CheckMPI( Isend(
|
||||||
|
numToTransfer_,
|
||||||
|
neighborProcessorNo(),
|
||||||
|
thisBoundaryIndex(),
|
||||||
|
pFlowProcessors().localCommunicator(),
|
||||||
|
&req), true );
|
||||||
|
|
||||||
|
CheckMPI(recv(
|
||||||
|
numToRecieve_,
|
||||||
|
neighborProcessorNo(),
|
||||||
|
mirrorBoundaryIndex(),
|
||||||
|
pFlowProcessors().localCommunicator(),
|
||||||
|
StatusesIgnore), true);
|
||||||
|
|
||||||
|
MPI_Request_free(&req);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if(step ==2 )
|
||||||
|
{
|
||||||
|
pointFieldAccessType transferPoints(
|
||||||
|
transferIndices_.size(),
|
||||||
|
transferIndices_.deviceViewAll(),
|
||||||
|
internal().pointPositionDevice());
|
||||||
|
|
||||||
|
sender_.sendData(pFlowProcessors(), transferPoints);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if(step == 3)
|
||||||
|
{
|
||||||
|
|
||||||
|
reciever_.recieveData(pFlowProcessors(), numToRecieve_);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if(step == 4)
|
||||||
|
{
|
||||||
|
reciever_.waitBufferForUse();
|
||||||
|
//
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
pFlow::MPI::boundaryProcessor::iterate(uint32 iterNum, real t, real dt)
|
pFlow::MPI::boundaryProcessor::iterate(uint32 iterNum, real t, real dt)
|
||||||
{
|
{
|
||||||
|
@ -144,5 +240,54 @@ pFlow::MPI::boundaryProcessor::iterate(uint32 iterNum, real t, real dt)
|
||||||
bool
|
bool
|
||||||
pFlow::MPI::boundaryProcessor::afterIteration(uint32 iterNum, real t, real dt)
|
pFlow::MPI::boundaryProcessor::afterIteration(uint32 iterNum, real t, real dt)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
uint32 s = size();
|
||||||
|
pOutput<<"size of boundary is "<< s <<endl;
|
||||||
|
uint32Vector_D transferFlags("transferFlags",s+1, s+1, RESERVE());
|
||||||
|
transferFlags.fill(0u);
|
||||||
|
|
||||||
|
const auto& transferD = transferFlags.deviceViewAll();
|
||||||
|
auto points = thisPoints();
|
||||||
|
auto p = boundaryPlane().infPlane();
|
||||||
|
|
||||||
|
uint32 numTransfer = 0;
|
||||||
|
|
||||||
|
Kokkos::parallel_reduce
|
||||||
|
(
|
||||||
|
"boundaryProcessor::afterIteration",
|
||||||
|
deviceRPolicyStatic(0,s),
|
||||||
|
LAMBDA_HD(uint32 i, uint32& transferToUpdate)
|
||||||
|
{
|
||||||
|
if(p.pointInNegativeSide(points(i)))
|
||||||
|
{
|
||||||
|
transferD(i)=1;
|
||||||
|
transferToUpdate++;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
numTransfer
|
||||||
|
);
|
||||||
|
|
||||||
|
pOutput<<"Numebr to be transfered "<< numTransfer<<endl;
|
||||||
|
|
||||||
|
uint32Vector_D transferIndices("transferIndices");
|
||||||
|
uint32Vector_D keepIndices("keepIndices");
|
||||||
|
|
||||||
|
pFlow::boundaryBaseKernels::createRemoveKeepIndices
|
||||||
|
(
|
||||||
|
indexList(),
|
||||||
|
numTransfer,
|
||||||
|
transferFlags,
|
||||||
|
transferIndices,
|
||||||
|
keepIndices
|
||||||
|
);
|
||||||
|
|
||||||
|
// delete transfer point from this processor
|
||||||
|
if( !setRemoveKeepIndices(transferIndices, keepIndices))
|
||||||
|
{
|
||||||
|
fatalErrorInFunction<<
|
||||||
|
"error in setting transfer and keep points in boundary "<< name()<<endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
|
@ -21,7 +21,6 @@ Licence:
|
||||||
#ifndef __boundaryProcessor_hpp__
|
#ifndef __boundaryProcessor_hpp__
|
||||||
#define __boundaryProcessor_hpp__
|
#define __boundaryProcessor_hpp__
|
||||||
|
|
||||||
|
|
||||||
#include "boundaryBase.hpp"
|
#include "boundaryBase.hpp"
|
||||||
#include "mpiTypes.hpp"
|
#include "mpiTypes.hpp"
|
||||||
#include "dataSender.hpp"
|
#include "dataSender.hpp"
|
||||||
|
@ -30,35 +29,34 @@ Licence:
|
||||||
namespace pFlow::MPI
|
namespace pFlow::MPI
|
||||||
{
|
{
|
||||||
|
|
||||||
class boundaryProcessor
|
class boundaryProcessor
|
||||||
:
|
: public boundaryBase
|
||||||
public boundaryBase
|
{
|
||||||
{
|
public:
|
||||||
private:
|
using pointFieldAccessType = typename boundaryBase::pointFieldAccessType;
|
||||||
|
|
||||||
|
private:
|
||||||
uint32 neighborProcNumPoints_ = 0;
|
uint32 neighborProcNumPoints_ = 0;
|
||||||
|
|
||||||
uint32 thisNumPoints_;
|
uint32 thisNumPoints_ = 0;
|
||||||
|
|
||||||
realx3Vector_D neighborProcPoints_;
|
realx3Vector_D neighborProcPoints_;
|
||||||
|
|
||||||
mutable Request sizeRequest_;
|
dataSender<realx3> sender_;
|
||||||
|
|
||||||
mutable Request sSizeRequest_;
|
dataReciever<realx3> reciever_;
|
||||||
|
|
||||||
int req_=0;
|
|
||||||
|
|
||||||
mutable bool sizeObtained_ = true;
|
|
||||||
|
|
||||||
mutable dataSender<realx3> sender_;
|
|
||||||
|
|
||||||
mutable dataReciever<realx3> reciever_;
|
|
||||||
|
|
||||||
mutable bool dataRecieved_ = true;
|
mutable bool dataRecieved_ = true;
|
||||||
|
|
||||||
void checkSize()const;
|
uint32 numToTransfer_ = 0;
|
||||||
|
|
||||||
void checkDataRecieved()const;
|
uint32 numToRecieve_ = 0;
|
||||||
|
|
||||||
|
uint32Vector_D transferIndices_{"transferIndices"};
|
||||||
|
|
||||||
|
void checkSize() const;
|
||||||
|
|
||||||
|
void checkDataRecieved() const;
|
||||||
|
|
||||||
/// @brief Update processor boundary data for this processor
|
/// @brief Update processor boundary data for this processor
|
||||||
/// @param step It is either 1 or 2 in the input to indicate
|
/// @param step It is either 1 or 2 in the input to indicate
|
||||||
|
@ -68,28 +66,26 @@ private:
|
||||||
/// allow processor boundaries to exchange data in two steps.
|
/// allow processor boundaries to exchange data in two steps.
|
||||||
/// The first step is a buffered non-blocking send and the second
|
/// The first step is a buffered non-blocking send and the second
|
||||||
/// step is non-blocking recieve to get data.
|
/// step is non-blocking recieve to get data.
|
||||||
bool updataBoundary(int step)override;
|
bool updataBoundary(int step) override;
|
||||||
|
|
||||||
public:
|
bool transferData(int step) override;
|
||||||
|
|
||||||
|
public:
|
||||||
TypeInfo("boundary<processor>");
|
TypeInfo("boundary<processor>");
|
||||||
|
|
||||||
boundaryProcessor(
|
boundaryProcessor(
|
||||||
const dictionary& dict,
|
const dictionary &dict,
|
||||||
const plane& bplane,
|
const plane &bplane,
|
||||||
internalPoints& internal,
|
internalPoints &internal,
|
||||||
boundaryList& bndrs,
|
boundaryList &bndrs,
|
||||||
uint32 thisIndex
|
uint32 thisIndex);
|
||||||
);
|
|
||||||
|
|
||||||
~boundaryProcessor() override = default;
|
~boundaryProcessor() override = default;
|
||||||
|
|
||||||
add_vCtor
|
add_vCtor(
|
||||||
(
|
|
||||||
boundaryBase,
|
boundaryBase,
|
||||||
boundaryProcessor,
|
boundaryProcessor,
|
||||||
dictionary
|
dictionary);
|
||||||
);
|
|
||||||
|
|
||||||
bool beforeIteration(uint32 iterNum, real t, real dt) override;
|
bool beforeIteration(uint32 iterNum, real t, real dt) override;
|
||||||
|
|
||||||
|
@ -103,13 +99,12 @@ public:
|
||||||
|
|
||||||
/// @brief Return a reference to point positions in the neighbor
|
/// @brief Return a reference to point positions in the neighbor
|
||||||
/// processor boundary.
|
/// processor boundary.
|
||||||
realx3Vector_D& neighborProcPoints() override;
|
realx3Vector_D &neighborProcPoints() override;
|
||||||
|
|
||||||
/// @brief Return a const reference to point positions in the
|
/// @brief Return a const reference to point positions in the
|
||||||
/// neighbor processor boundary.
|
/// neighbor processor boundary.
|
||||||
const realx3Vector_D& neighborProcPoints() const override;
|
const realx3Vector_D &neighborProcPoints() const override;
|
||||||
|
};
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace pFlow::MPI
|
} // namespace pFlow::MPI
|
||||||
|
|
||||||
|
|
|
@ -27,13 +27,11 @@ private:
|
||||||
|
|
||||||
BufferVectorType buffer_;
|
BufferVectorType buffer_;
|
||||||
|
|
||||||
std::vector<T> buffer0_;
|
|
||||||
|
|
||||||
int fromProc_;
|
int fromProc_;
|
||||||
|
|
||||||
int tag_;
|
int tag_;
|
||||||
|
|
||||||
Request recvRequest_;
|
mutable Request recvRequest_ = RequestNull;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
@ -46,34 +44,40 @@ public:
|
||||||
|
|
||||||
~dataReciever()=default;
|
~dataReciever()=default;
|
||||||
|
|
||||||
|
uint32 waitBufferForUse()const
|
||||||
|
{
|
||||||
|
if(recvRequest_ != RequestNull)
|
||||||
|
{
|
||||||
|
Status status;
|
||||||
|
MPI_Wait(&recvRequest_, &status);
|
||||||
|
int count;
|
||||||
|
CheckMPI(getCount<T>(&status, count), true);
|
||||||
|
|
||||||
|
return static_cast<uint32>(count);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return buffer_.size();
|
||||||
|
}
|
||||||
|
|
||||||
void recieveData(
|
void recieveData(
|
||||||
const localProcessors& processors,
|
const localProcessors& processors,
|
||||||
uint32 numToRecv
|
uint32 numToRecv
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
waitBufferForUse();
|
||||||
|
buffer_.clear();
|
||||||
|
buffer_.resize(numToRecv);
|
||||||
|
|
||||||
buffer0_.clear();
|
CheckMPI(
|
||||||
buffer0_.resize(numToRecv);
|
Irecv(
|
||||||
MPI_Status status;
|
|
||||||
|
|
||||||
/*CheckMPI(recv(
|
|
||||||
buffer_.getSpan(),
|
buffer_.getSpan(),
|
||||||
fromProc_,
|
fromProc_,
|
||||||
tag_,
|
tag_,
|
||||||
processors.localCommunicator(),
|
processors.localCommunicator(),
|
||||||
&status), true);*/
|
&recvRequest_
|
||||||
MPI_Recv(
|
),
|
||||||
buffer0_.data(),
|
true
|
||||||
buffer0_.size(),
|
|
||||||
realx3Type__,
|
|
||||||
fromProc_,
|
|
||||||
tag_,
|
|
||||||
processors.localCommunicator(),
|
|
||||||
&status
|
|
||||||
);
|
);
|
||||||
int c;
|
|
||||||
getCount<realx3>(&status, c);
|
|
||||||
pOutput<<"Number of data recieved "<<c<<endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& buffer()
|
auto& buffer()
|
||||||
|
@ -86,20 +90,6 @@ public:
|
||||||
return buffer_;
|
return buffer_;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 waitComplete()
|
|
||||||
{
|
|
||||||
|
|
||||||
/*Status status;
|
|
||||||
|
|
||||||
CheckMPI(MPI_Wait(&recvRequest_, &status), true);
|
|
||||||
|
|
||||||
int count;
|
|
||||||
CheckMPI(getCount<T>(&status, count), true);
|
|
||||||
|
|
||||||
return static_cast<uint32>(count);*/
|
|
||||||
return buffer_.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,15 +26,13 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
//BufferVectorType buffer_;
|
BufferVectorType buffer_;
|
||||||
|
|
||||||
std::vector<T> buffer_;
|
|
||||||
|
|
||||||
int toProc_;
|
int toProc_;
|
||||||
|
|
||||||
int tag_;
|
int tag_;
|
||||||
|
|
||||||
Request sendRequest_ = RequestNull;
|
mutable Request sendRequest_ = RequestNull;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
@ -44,7 +42,22 @@ public:
|
||||||
tag_(tag)
|
tag_(tag)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
~dataSender()=default;
|
~dataSender()
|
||||||
|
{
|
||||||
|
if(sendRequest_ != RequestNull)
|
||||||
|
{
|
||||||
|
MPI_Request_free(&sendRequest_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool waitBufferForUse()const
|
||||||
|
{
|
||||||
|
if(sendRequest_ != RequestNull)
|
||||||
|
{
|
||||||
|
MPI_Wait(&sendRequest_, StatusesIgnore);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void sendData(
|
void sendData(
|
||||||
const localProcessors& processors,
|
const localProcessors& processors,
|
||||||
|
@ -52,17 +65,21 @@ public:
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
using RPolicy = Kokkos::RangePolicy<
|
using RPolicy = Kokkos::RangePolicy<
|
||||||
DefaultExecutionSpace,
|
execution_space,
|
||||||
Kokkos::Schedule<Kokkos::Static>,
|
Kokkos::Schedule<Kokkos::Static>,
|
||||||
Kokkos::IndexType<pFlow::uint32>>;
|
Kokkos::IndexType<pFlow::uint32>>;
|
||||||
|
|
||||||
uint32 n = scatterField.size();
|
uint32 n = scatterField.size();
|
||||||
|
|
||||||
|
// make sure the buffer is ready to be used and free
|
||||||
|
// the previous request (if any).
|
||||||
|
waitBufferForUse();
|
||||||
|
|
||||||
// clear the buffer to prevent data copy if capacity increases
|
// clear the buffer to prevent data copy if capacity increases
|
||||||
buffer_.clear();
|
buffer_.clear();
|
||||||
buffer_.resize(n);
|
buffer_.resize(n);
|
||||||
|
|
||||||
auto* buffView = buffer_.data();
|
const auto& buffView = buffer_.deviceViewAll();
|
||||||
|
|
||||||
Kokkos::parallel_for(
|
Kokkos::parallel_for(
|
||||||
"dataSender::sendData",
|
"dataSender::sendData",
|
||||||
|
@ -73,26 +90,20 @@ public:
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
Kokkos::fence();
|
Kokkos::fence();
|
||||||
auto req = MPI_REQUEST_NULL;
|
|
||||||
|
|
||||||
MPI_Isend(
|
CheckMPI(
|
||||||
buffer_.data(),
|
Isend(buffer_.getSpan(),
|
||||||
buffer_.size(),
|
|
||||||
realx3Type__,
|
|
||||||
toProc_,
|
toProc_,
|
||||||
tag_,
|
tag_,
|
||||||
processors.localCommunicator(),
|
processors.localCommunicator(),
|
||||||
&req);
|
&sendRequest_
|
||||||
|
),
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
/*CheckMPI(send(
|
|
||||||
buffer_.getSpan(),
|
|
||||||
toProc_,
|
|
||||||
tag_,
|
|
||||||
processors.localCommunicator(),
|
|
||||||
MPI_STATUS_IGNORE), true);*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*auto& buffer()
|
auto& buffer()
|
||||||
{
|
{
|
||||||
return buffer_;
|
return buffer_;
|
||||||
}
|
}
|
||||||
|
@ -100,17 +111,20 @@ public:
|
||||||
const auto& buffer()const
|
const auto& buffer()const
|
||||||
{
|
{
|
||||||
return buffer_;
|
return buffer_;
|
||||||
}*/
|
}
|
||||||
|
|
||||||
bool sendComplete()
|
bool sendComplete()
|
||||||
{
|
{
|
||||||
return true;
|
int test;
|
||||||
/*int test;
|
if(sendRequest_ != RequestNull)
|
||||||
|
{
|
||||||
MPI_Test(&sendRequest_, &test, StatusIgnore);
|
MPI_Test(&sendRequest_, &test, StatusIgnore);
|
||||||
if(test)
|
return test;
|
||||||
return true;
|
}
|
||||||
else
|
else
|
||||||
return false;*/
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -156,3 +156,9 @@ void pFlow::processorOstream::indent()
|
||||||
checkForPrefix();
|
checkForPrefix();
|
||||||
Ostream::indent();
|
Ostream::indent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pFlow::processorOstream &pFlow::processorOstream::setColor(const char *colorCode)
|
||||||
|
{
|
||||||
|
Ostream::write(colorCode);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
|
@ -139,6 +139,8 @@ public:
|
||||||
/// Add indentation characters
|
/// Add indentation characters
|
||||||
void indent() override;
|
void indent() override;
|
||||||
|
|
||||||
|
processorOstream& setColor(const char* colorCode);
|
||||||
|
|
||||||
|
|
||||||
}; // processorOstream
|
}; // processorOstream
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ namespace pFlow
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define INFORMATION pFlow::pOutput<<boldChar<<magentaColor<<"> INFO: "<<defaultColor<<magentaColor
|
#define INFORMATION pFlow::pOutput.setColor(boldChar).setColor(magentaColor)<<"> INFO: "<<defaultColor<<magentaColor
|
||||||
#define END_INFO defaultColor<<pFlow::endl
|
#define END_INFO defaultColor<<pFlow::endl
|
||||||
|
|
||||||
#define REPORT(n) pFlow::mOutput.space(2*n)
|
#define REPORT(n) pFlow::mOutput.space(2*n)
|
||||||
|
|
Loading…
Reference in New Issue