Merge pull request #1 from hamidrezanorouzi/MPIdev

Mp idev
This commit is contained in:
Hamidreza Norouzi 2024-05-05 23:02:16 +03:30 committed by GitHub
commit 30d5349fcf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 362 additions and 176 deletions

View File

@ -5,8 +5,16 @@
**PhasicFlow** is a parallel C++ code for performing DEM simulations. It can run on shared-memory multi-core computational units such as multi-core CPUs or GPUs (for now it works on CUDA-enabled GPUs). The parallelization method mainly relies on loop-level parallelization on a shared-memory computational unit. You can build and run PhasicFlow in serial mode on regular PCs, in parallel mode for multi-core CPUs, or build it for a GPU device to off-load computations to a GPU. In its current statues you can simulate millions of particles (up to 80M particles tested) on a single desktop computer. You can see the [performance tests of PhasicFlow](https://github.com/PhasicFlow/phasicFlow/wiki/Performance-of-phasicFlow) in the wiki page.
**MPI** parallelization with dynamic load balancing is under development. With this level of parallelization, PhasicFlow can leverage the computational power of **multi-gpu** workstations or clusters with distributed memory CPUs.
In summary PhasicFlow can have 6 execution modes:
1. Serial on a single CPU core,
2. Parallel on a multi-core computer/node (using OpenMP),
3. Parallel on an nvidia-GPU (using Cuda),
4. Parallel on distributed memory workstation (Using MPI)
5. Parallel on distributed memory workstations with multi-core nodes (using MPI+OpenMP)
6. Parallel on workstations with multiple GPUs (using MPI+Cuda).
## How to build?
You can build PhasicFlow for CPU and GPU executions. [Here is a complete step-by-step procedure](https://github.com/PhasicFlow/phasicFlow/wiki/How-to-Build-PhasicFlow).
You can build PhasicFlow for CPU and GPU executions. The latest release of PhasicFlow is v-0.1. [Here is a complete step-by-step procedure for building phasicFlow-v-0.1.](https://github.com/PhasicFlow/phasicFlow/wiki/How-to-Build-PhasicFlow).
## Online code documentation
You can find a full documentation of the code, its features, and other related materials on [online documentation of the code](https://phasicflow.github.io/phasicFlow/)

View File

@ -130,9 +130,9 @@ public:
csPairContainerType& pwPairs,
bool force = false) override
{
ppTimer().start();
Particles().boundingSphere().updateBoundaries(DataDirection::SlaveToMaster);
ppTimer().start();
const auto& position = Particles().pointPosition().deviceViewAll();
const auto& flags = Particles().dynPointStruct().activePointsMaskDevice();
@ -167,6 +167,7 @@ public:
csPairContainerType& pwPairs,
bool force = false)override
{
Particles().boundingSphere().updateBoundaries(DataDirection::SlaveToMaster);
return csBoundaries_[i].broadSearch(
iter,
t,
@ -176,7 +177,6 @@ public:
force);
}
bool enterBroadSearch(uint32 iter, real t, real dt)const override
{
if(ppwContactSearch_)

View File

@ -85,7 +85,7 @@ bool pFlow::processorBoundaryContactSearch::broadSearch
{
if(masterSearch_)
{
/*const auto thisPoints = boundary().thisPoints();
const auto thisPoints = boundary().thisPoints();
const auto& neighborProcPoints = boundary().neighborProcPoints();
const auto& bDiams = diameter_.BoundaryField(thisBoundaryIndex());
const auto thisDiams = bDiams.thisField();
@ -96,9 +96,9 @@ bool pFlow::processorBoundaryContactSearch::broadSearch
thisPoints,
thisDiams,
neighborProcPoints,
neighborProcDiams);
pOutput<<"ppPairs size in boundary"<< ppPairs.size()<<endl; */
neighborProcDiams
);
//pOutput<<"ppSize "<< ppPairs.size()<<endl;
return true;
}else

View File

@ -99,7 +99,7 @@ bool pFlow::twoPartContactSearch::broadSearchPP
ppPairs.increaseCapacityBy(len);
INFORMATION<< "Particle-particle contact pair container capacity increased from "<<
oldCap << " to "<<ppPairs.capacity()<<" in peiodicBoundaryContactSearch."<<END_INFO;
oldCap << " to "<<ppPairs.capacity()<<" in contact search in boundary region."<<END_INFO;
}

View File

@ -32,9 +32,7 @@ pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::processorBoundarySpher
geomMotion
),
masterInteraction_(boundary.isBoundaryMaster())
{
pOutput<<"Processor boundayrCondition for "<< boundary.name()<<endl;
}
{}
template <typename cFM, typename gMM>
bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInteraction
@ -43,13 +41,13 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
const ContactForceModel &cfModel
)
{
return true;
if(!masterInteraction_) return true;
const auto & sphPar = this->sphParticles();
uint32 thisIndex = this->boundary().thisBoundaryIndex();
const auto& a = sphPar.diameter().BoundaryField(thisIndex).neighborProcField().deviceViewAll();
/*pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
pOutput<<"beofre sphereSphereInteraction"<<endl;
pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
dt,
this->ppPairs(),
cfModel,
@ -67,7 +65,9 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
sphPar.rVelocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
sphPar.contactForce().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
sphPar.contactTorque().BoundaryField(thisIndex).neighborProcField().deviceViewAll()
);*/
);
pOutput<<"after sphereSphereInteraction"<<endl;
return true;
}

View File

@ -166,12 +166,12 @@ bool pFlow::sphereInteraction<cFM,gMM, cLT>::iterate()
bool broadSearch = contactSearch_().enterBroadSearch(iter, t, dt);
/*sphParticles_.diameter().updateBoundaries(DataDirection::SlaveToMaster);
sphParticles_.diameter().updateBoundaries(DataDirection::SlaveToMaster);
sphParticles_.velocity().updateBoundaries(DataDirection::SlaveToMaster);
sphParticles_.rVelocity().updateBoundaries(DataDirection::SlaveToMaster);
sphParticles_.mass().updateBoundaries(DataDirection::SlaveToMaster);
sphParticles_.I().updateBoundaries(DataDirection::SlaveToMaster);
sphParticles_.propertyId().updateBoundaries(DataDirection::SlaveToMaster);*/
sphParticles_.propertyId().updateBoundaries(DataDirection::SlaveToMaster);
if(broadSearch)

View File

@ -238,6 +238,18 @@ inline auto send(span<T> data, int dest, int tag, Comm comm)
comm);
}
template<typename T>
inline auto send(const T& data, int dest, int tag, Comm comm)
{
return MPI_Send(
&data,
sFactor<T>(),
Type<T>(),
dest,
tag,
comm);
}
template<typename T>
inline auto Isend(span<T> data, int dest, int tag, Comm comm, Request* req)
{
@ -277,6 +289,19 @@ inline auto recv(span<T> data, int source, int tag, Comm comm, Status *status)
status);
}
template<typename T>
inline auto recv(T& data, int source, int tag, Comm comm, Status *status)
{
return MPI_Recv(
&data,
sFactor<T>(),
Type<T>(),
source,
tag,
comm,
status);
}
template<typename T>
inline auto Irecv(T& data, int source, int tag, Comm comm, Request* req)
{

View File

@ -24,13 +24,13 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::checkDataRecieved() const
{
if (!dataRecieved_)
{
//uint32 nRecv = reciever_.waitComplete();
uint32 nRecv = reciever_.waitBufferForUse();
dataRecieved_ = true;
/*if (nRecv != this->neighborProcSize())
if (nRecv != this->neighborProcSize())
{
fatalErrorInFunction;
fatalExit;
}*/
}
}
}
@ -41,7 +41,7 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
DataDirection direction
)
{
/*if (step == 1)
if (step == 1)
{
// Isend
if (direction == DataDirection::TwoWay ||
@ -67,7 +67,7 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
{
fatalErrorInFunction << "Invalid step number " << step << endl;
return false;
}*/
}
return true;
}
@ -90,6 +90,8 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::processorBoundaryField(
boundary.mirrorBoundaryIndex()
)
{
this->addEvent(message::BNDR_PROCTRANS1).
addEvent(message::BNDR_PROCTRANS2);
}
template<class T, class MemorySpace>

View File

@ -50,11 +50,11 @@ public:
private:
dataSender<T, MemorySpace> sender_;
dataSender<T, MemorySpace> sender_;
mutable dataReciever<T, MemorySpace> reciever_;
dataReciever<T, MemorySpace> reciever_;
mutable bool dataRecieved_ = true;
mutable bool dataRecieved_ = true;
void checkDataRecieved()const;
@ -82,7 +82,6 @@ public:
ProcVectorType& neighborProcField() override;
const ProcVectorType& neighborProcField()const override;
bool hearChanges

View File

@ -21,15 +21,13 @@ Licence:
#include "boundaryProcessor.hpp"
#include "dictionary.hpp"
#include "mpiCommunication.hpp"
#include "boundaryBaseKernels.hpp"
#include "internalPoints.hpp"
void
pFlow::MPI::boundaryProcessor::checkSize() const
{
if (!sizeObtained_)
{
//MPI_Wait(&sizeRequest_, StatusIgnore);
sizeObtained_ = true;
}
}
void
@ -37,13 +35,13 @@ pFlow::MPI::boundaryProcessor::checkDataRecieved() const
{
if (!dataRecieved_)
{
//uint32 nRecv = reciever_.waitComplete();
uint32 nRecv = reciever_.waitBufferForUse();
dataRecieved_ = true;
/*if (nRecv != neighborProcSize())
if (nRecv != neighborProcSize())
{
fatalErrorInFunction;
fatalExit;
}*/
}
}
}
@ -92,8 +90,7 @@ pFlow::MPI::boundaryProcessor::beforeIteration(uint32 iterNum, real t, real dt)
pFlowProcessors().localCommunicator(),
MPI_STATUS_IGNORE
);
sizeObtained_ = false;
MPI_Request_free(&req);
return true;
}
@ -135,6 +132,105 @@ pFlow::MPI::boundaryProcessor::updataBoundary(int step)
return true;
}
bool pFlow::MPI::boundaryProcessor::transferData(int step)
{
if(step==1)
{
uint32 s = size();
uint32Vector_D transferFlags("transferFlags",s+1, s+1, RESERVE());
transferFlags.fill(0u);
const auto& transferD = transferFlags.deviceViewAll();
auto points = thisPoints();
auto p = boundaryPlane().infPlane();
numToTransfer_ = 0;
Kokkos::parallel_reduce
(
"boundaryProcessor::afterIteration",
deviceRPolicyStatic(0,s),
LAMBDA_HD(uint32 i, uint32& transferToUpdate)
{
if(p.pointInNegativeSide(points(i)))
{
transferD(i)=1;
transferToUpdate++;
}
},
numToTransfer_
);
uint32Vector_D keepIndices("keepIndices");
if(numToTransfer_ != 0u)
{
pFlow::boundaryBaseKernels::createRemoveKeepIndices
(
indexList(),
numToTransfer_,
transferFlags,
transferIndices_,
keepIndices,
false
);
// delete transfer point from this processor
if( !setRemoveKeepIndices(transferIndices_, keepIndices))
{
fatalErrorInFunction<<
"error in setting transfer and keep points in boundary "<< name()<<endl;
return false;
}
}
else
{
transferIndices_.clear();
}
auto req = RequestNull;
CheckMPI( Isend(
numToTransfer_,
neighborProcessorNo(),
thisBoundaryIndex(),
pFlowProcessors().localCommunicator(),
&req), true );
CheckMPI(recv(
numToRecieve_,
neighborProcessorNo(),
mirrorBoundaryIndex(),
pFlowProcessors().localCommunicator(),
StatusesIgnore), true);
MPI_Request_free(&req);
return true;
}
else if(step ==2 )
{
pointFieldAccessType transferPoints(
transferIndices_.size(),
transferIndices_.deviceViewAll(),
internal().pointPositionDevice());
sender_.sendData(pFlowProcessors(), transferPoints);
return true;
}
else if(step == 3)
{
reciever_.recieveData(pFlowProcessors(), numToRecieve_);
return true;
}
else if(step == 4)
{
reciever_.waitBufferForUse();
//
return false;
}
return false;
}
bool
pFlow::MPI::boundaryProcessor::iterate(uint32 iterNum, real t, real dt)
{
@ -144,5 +240,54 @@ pFlow::MPI::boundaryProcessor::iterate(uint32 iterNum, real t, real dt)
bool
pFlow::MPI::boundaryProcessor::afterIteration(uint32 iterNum, real t, real dt)
{
uint32 s = size();
pOutput<<"size of boundary is "<< s <<endl;
uint32Vector_D transferFlags("transferFlags",s+1, s+1, RESERVE());
transferFlags.fill(0u);
const auto& transferD = transferFlags.deviceViewAll();
auto points = thisPoints();
auto p = boundaryPlane().infPlane();
uint32 numTransfer = 0;
Kokkos::parallel_reduce
(
"boundaryProcessor::afterIteration",
deviceRPolicyStatic(0,s),
LAMBDA_HD(uint32 i, uint32& transferToUpdate)
{
if(p.pointInNegativeSide(points(i)))
{
transferD(i)=1;
transferToUpdate++;
}
},
numTransfer
);
pOutput<<"Numebr to be transfered "<< numTransfer<<endl;
uint32Vector_D transferIndices("transferIndices");
uint32Vector_D keepIndices("keepIndices");
pFlow::boundaryBaseKernels::createRemoveKeepIndices
(
indexList(),
numTransfer,
transferFlags,
transferIndices,
keepIndices
);
// delete transfer point from this processor
if( !setRemoveKeepIndices(transferIndices, keepIndices))
{
fatalErrorInFunction<<
"error in setting transfer and keep points in boundary "<< name()<<endl;
return false;
}
return true;
}

View File

@ -1,7 +1,7 @@
/*------------------------------- phasicFlow ---------------------------------
O C enter of
O O E ngineering and
O O M ultiscale modeling of
O C enter of
O O E ngineering and
O O M ultiscale modeling of
OOOOOOO F luid flow
------------------------------------------------------------------------------
Copyright (C): www.cemf.ir
@ -21,7 +21,6 @@ Licence:
#ifndef __boundaryProcessor_hpp__
#define __boundaryProcessor_hpp__
#include "boundaryBase.hpp"
#include "mpiTypes.hpp"
#include "dataSender.hpp"
@ -30,86 +29,82 @@ Licence:
namespace pFlow::MPI
{
class boundaryProcessor
:
public boundaryBase
{
private:
class boundaryProcessor
: public boundaryBase
{
public:
using pointFieldAccessType = typename boundaryBase::pointFieldAccessType;
uint32 neighborProcNumPoints_ = 0;
private:
uint32 neighborProcNumPoints_ = 0;
uint32 thisNumPoints_;
uint32 thisNumPoints_ = 0;
realx3Vector_D neighborProcPoints_;
realx3Vector_D neighborProcPoints_;
mutable Request sizeRequest_;
dataSender<realx3> sender_;
mutable Request sSizeRequest_;
dataReciever<realx3> reciever_;
int req_=0;
mutable bool dataRecieved_ = true;
mutable bool sizeObtained_ = true;
uint32 numToTransfer_ = 0;
mutable dataSender<realx3> sender_;
uint32 numToRecieve_ = 0;
mutable dataReciever<realx3> reciever_;
uint32Vector_D transferIndices_{"transferIndices"};
mutable bool dataRecieved_ = true;
void checkSize() const;
void checkSize()const;
void checkDataRecieved() const;
void checkDataRecieved()const;
/// @brief Update processor boundary data for this processor
/// @param step It is either 1 or 2 in the input to indicate
/// the update step
/// @return true if successful
/// @details This method is called by boundaryList two times to
/// allow processor boundaries to exchange data in two steps.
/// The first step is a buffered non-blocking send and the second
/// step is non-blocking recieve to get data.
bool updataBoundary(int step) override;
/// @brief Update processor boundary data for this processor
/// @param step It is either 1 or 2 in the input to indicate
/// the update step
/// @return true if successful
/// @details This method is called by boundaryList two times to
/// allow processor boundaries to exchange data in two steps.
/// The first step is a buffered non-blocking send and the second
/// step is non-blocking recieve to get data.
bool updataBoundary(int step)override;
bool transferData(int step) override;
public:
public:
TypeInfo("boundary<processor>");
TypeInfo("boundary<processor>");
boundaryProcessor(
const dictionary &dict,
const plane &bplane,
internalPoints &internal,
boundaryList &bndrs,
uint32 thisIndex);
boundaryProcessor(
const dictionary& dict,
const plane& bplane,
internalPoints& internal,
boundaryList& bndrs,
uint32 thisIndex
);
~boundaryProcessor() override = default;
~boundaryProcessor() override = default;
add_vCtor(
boundaryBase,
boundaryProcessor,
dictionary);
add_vCtor
(
boundaryBase,
boundaryProcessor,
dictionary
);
bool beforeIteration(uint32 iterNum, real t, real dt) override;
bool beforeIteration(uint32 iterNum, real t, real dt) override;
bool iterate(uint32 iterNum, real t, real dt) override;
bool iterate(uint32 iterNum, real t, real dt) override;
bool afterIteration(uint32 iterNum, real t, real dt) override;
bool afterIteration(uint32 iterNum, real t, real dt) override;
/// @brief Return number of points in the neighbor processor boundary.
/// This is overriden from boundaryBase.
uint32 neighborProcSize() const override;
/// @brief Return number of points in the neighbor processor boundary.
/// This is overriden from boundaryBase.
uint32 neighborProcSize() const override;
/// @brief Return a reference to point positions in the neighbor
/// processor boundary.
realx3Vector_D &neighborProcPoints() override;
/// @brief Return a reference to point positions in the neighbor
/// processor boundary.
realx3Vector_D& neighborProcPoints() override;
/// @brief Return a const reference to point positions in the
/// neighbor processor boundary.
const realx3Vector_D& neighborProcPoints() const override;
};
/// @brief Return a const reference to point positions in the
/// neighbor processor boundary.
const realx3Vector_D &neighborProcPoints() const override;
};
} // namespace pFlow::MPI

View File

@ -27,13 +27,11 @@ private:
BufferVectorType buffer_;
std::vector<T> buffer0_;
int fromProc_;
int tag_;
Request recvRequest_;
mutable Request recvRequest_ = RequestNull;
public:
@ -46,34 +44,40 @@ public:
~dataReciever()=default;
uint32 waitBufferForUse()const
{
if(recvRequest_ != RequestNull)
{
Status status;
MPI_Wait(&recvRequest_, &status);
int count;
CheckMPI(getCount<T>(&status, count), true);
return static_cast<uint32>(count);
}
else
return buffer_.size();
}
void recieveData(
const localProcessors& processors,
uint32 numToRecv
)
{
waitBufferForUse();
buffer_.clear();
buffer_.resize(numToRecv);
buffer0_.clear();
buffer0_.resize(numToRecv);
MPI_Status status;
/*CheckMPI(recv(
buffer_.getSpan(),
fromProc_,
tag_,
processors.localCommunicator(),
&status), true);*/
MPI_Recv(
buffer0_.data(),
buffer0_.size(),
realx3Type__,
fromProc_,
tag_,
processors.localCommunicator(),
&status
CheckMPI(
Irecv(
buffer_.getSpan(),
fromProc_,
tag_,
processors.localCommunicator(),
&recvRequest_
),
true
);
int c;
getCount<realx3>(&status, c);
pOutput<<"Number of data recieved "<<c<<endl;
}
auto& buffer()
@ -86,20 +90,6 @@ public:
return buffer_;
}
uint32 waitComplete()
{
/*Status status;
CheckMPI(MPI_Wait(&recvRequest_, &status), true);
int count;
CheckMPI(getCount<T>(&status, count), true);
return static_cast<uint32>(count);*/
return buffer_.size();
}
};
}

View File

@ -26,15 +26,13 @@ public:
private:
//BufferVectorType buffer_;
std::vector<T> buffer_;
BufferVectorType buffer_;
int toProc_;
int tag_;
Request sendRequest_ = RequestNull;
mutable Request sendRequest_ = RequestNull;
public:
@ -44,7 +42,22 @@ public:
tag_(tag)
{}
~dataSender()=default;
~dataSender()
{
if(sendRequest_ != RequestNull)
{
MPI_Request_free(&sendRequest_);
}
}
bool waitBufferForUse()const
{
if(sendRequest_ != RequestNull)
{
MPI_Wait(&sendRequest_, StatusesIgnore);
}
return true;
}
void sendData(
const localProcessors& processors,
@ -52,17 +65,21 @@ public:
)
{
using RPolicy = Kokkos::RangePolicy<
DefaultExecutionSpace,
execution_space,
Kokkos::Schedule<Kokkos::Static>,
Kokkos::IndexType<pFlow::uint32>>;
uint32 n = scatterField.size();
// make sure the buffer is ready to be used and free
// the previous request (if any).
waitBufferForUse();
// clear the buffer to prevent data copy if capacity increases
buffer_.clear();
buffer_.resize(n);
auto* buffView = buffer_.data();
const auto& buffView = buffer_.deviceViewAll();
Kokkos::parallel_for(
"dataSender::sendData",
@ -73,26 +90,20 @@ public:
}
);
Kokkos::fence();
auto req = MPI_REQUEST_NULL;
MPI_Isend(
buffer_.data(),
buffer_.size(),
realx3Type__,
toProc_,
tag_,
processors.localCommunicator(),
&req);
CheckMPI(
Isend(buffer_.getSpan(),
toProc_,
tag_,
processors.localCommunicator(),
&sendRequest_
),
true
);
/*CheckMPI(send(
buffer_.getSpan(),
toProc_,
tag_,
processors.localCommunicator(),
MPI_STATUS_IGNORE), true);*/
}
/*auto& buffer()
auto& buffer()
{
return buffer_;
}
@ -100,17 +111,20 @@ public:
const auto& buffer()const
{
return buffer_;
}*/
}
bool sendComplete()
{
return true;
/*int test;
MPI_Test(&sendRequest_, &test, StatusIgnore);
if(test)
return true;
int test;
if(sendRequest_ != RequestNull)
{
MPI_Test(&sendRequest_, &test, StatusIgnore);
return test;
}
else
return false;*/
{
return true;
}
}
};

View File

@ -156,3 +156,9 @@ void pFlow::processorOstream::indent()
checkForPrefix();
Ostream::indent();
}
pFlow::processorOstream &pFlow::processorOstream::setColor(const char *colorCode)
{
Ostream::write(colorCode);
return *this;
}

View File

@ -139,6 +139,8 @@ public:
/// Add indentation characters
void indent() override;
processorOstream& setColor(const char* colorCode);
}; // processorOstream

View File

@ -33,7 +33,7 @@ namespace pFlow
}
#define INFORMATION pFlow::pOutput<<boldChar<<magentaColor<<"> INFO: "<<defaultColor<<magentaColor
#define INFORMATION pFlow::pOutput.setColor(boldChar).setColor(magentaColor)<<"> INFO: "<<defaultColor<<magentaColor
#define END_INFO defaultColor<<pFlow::endl
#define REPORT(n) pFlow::mOutput.space(2*n)