From 6969b71cc51e8bdbac1deadff83c344a79943264 Mon Sep 17 00:00:00 2001
From: Hamidreza Norouzi <hamid.r.norouzi@gmail.com>
Date: Sat, 20 Jan 2024 11:30:49 -0800
Subject: [PATCH 01/14] MPI code

---
 .../MPIParallelization/CMakeLists.txt         |  36 ++
 .../MPIParallelization/boundaryProcessor.cpp  |  61 +++
 .../MPIParallelization/boundaryProcessor.hpp  |  67 +++
 .../MPIParallelization/dataIOMPI.cpp          |   5 +
 .../MPIParallelization/dataIOMPI.hpp          |  97 +++++
 .../domain/MPISimulationDomain.cpp            | 400 ++++++++++++++++++
 .../domain/MPISimulationDomain.hpp            | 116 +++++
 .../MPIParallelization/gatherMaster.hpp       | 105 +++++
 .../MPIParallelization/mpiCommunication.hpp   | 383 +++++++++++++++++
 .../MPIParallelization/mpiTypes.hpp           |  75 ++++
 .../MPIParallelization/partitioning.cpp       | 113 +++++
 .../MPIParallelization/partitioning.hpp       | 168 ++++++++
 .../MPIParallelization/procCommunication.cpp  |  30 ++
 .../MPIParallelization/procCommunication.hpp  | 178 ++++++++
 .../MPIParallelization/procVector.hpp         | 199 +++++++++
 .../processorBoundaryField.cpp                |  29 ++
 .../processorBoundaryField.hpp                |  80 ++++
 .../processorBoundaryFields.cpp               |  10 +
 .../MPIParallelization/rcb1DPartitioning.cpp  | 319 ++++++++++++++
 .../MPIParallelization/rcb1DPartitioning.hpp  | 241 +++++++++++
 .../scatteredMasterDistribute.cpp             | 158 +++++++
 .../scatteredMasterDistribute.hpp             |  69 +++
 .../scatteredMasterDistributeChar.cpp         | 166 ++++++++
 .../scatteredMasterDistributeChar.hpp         |  67 +++
 24 files changed, 3172 insertions(+)
 create mode 100644 src/phasicFlow/MPIParallelization/CMakeLists.txt
 create mode 100644 src/phasicFlow/MPIParallelization/boundaryProcessor.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/boundaryProcessor.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/dataIOMPI.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/dataIOMPI.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/gatherMaster.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/mpiCommunication.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/mpiTypes.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/partitioning.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/partitioning.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/procCommunication.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/procCommunication.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/procVector.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/processorBoundaryField.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/processorBoundaryField.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/processorBoundaryFields.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/scatteredMasterDistribute.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/scatteredMasterDistribute.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.hpp

diff --git a/src/phasicFlow/MPIParallelization/CMakeLists.txt b/src/phasicFlow/MPIParallelization/CMakeLists.txt
new file mode 100644
index 00000000..32ab1c6b
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/CMakeLists.txt
@@ -0,0 +1,36 @@
+#add Zoltan
+set(Zoltan_Install_DIR)
+if(DEFINED ENV{Zoltan_DIR})
+   set(Zoltan_Install_DIR $ENV{Zoltan_DIR})
+else()
+  set(Zoltan_Install_DIR $ENV{HOME}/PhasicFlow/Zoltan)
+endif()
+message(STATUS "Zoltan install directory is ${Zoltan_Install_DIR}")
+
+set(ZOLTAN_PREFIX "${Zoltan_Install_DIR}" CACHE STRING "Zoltan install directory")
+
+find_path(ZOLTAN_INCLUDE_DIR zoltan.h PATHS "${ZOLTAN_PREFIX}/include")
+
+message(STATUS "Zoltan include path: ${ZOLTAN_INCLUDE_DIR}")
+
+find_library(ZOLTAN_LIBRARY zoltan PATHS "${ZOLTAN_PREFIX}/lib")
+message(STATUS "Zoltan lib path: ${ZOLTAN_LIBRARY}")
+
+
+set(SourceFiles 
+    partitioning.cpp 
+    rcb1DPartitioning.cpp
+    domain/MPISimulationDomain.cpp)
+
+set(link_libs Kokkos::kokkos phasicFlow PRIVATE MPI::MPI_CXX ${ZOLTAN_LIBRARY} -lm )
+
+pFlow_add_library_install(MPIParallelization SourceFiles link_libs)
+target_include_directories(MPIParallelization PUBLIC ${ZOLTAN_INCLUDE_DIR})
+
+
+
+
+
+
+
+
diff --git a/src/phasicFlow/MPIParallelization/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/boundaryProcessor.cpp
new file mode 100644
index 00000000..a5622691
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/boundaryProcessor.cpp
@@ -0,0 +1,61 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "boundaryProcessor.hpp"
+#include "dictionary.hpp"
+
+pFlow::boundaryProcessor::boundaryProcessor
+(
+	const dictionary& dict,
+	const plane&      bplane,
+	internalPoints&   internal
+)
+:
+	boundaryBase(dict, bplane, internal)
+{
+	
+}
+
+bool pFlow::boundaryProcessor::beforeIteratoin
+(
+	uint32 iterNum, 
+	real t
+)
+{
+	return true;
+}
+
+bool pFlow::boundaryProcessor::iterate
+(
+	uint32 iterNum, 
+	real t
+)
+{
+	return true;
+}
+
+bool pFlow::boundaryProcessor::afterIteration
+(
+	uint32 iterNum, 
+	real t
+)
+{
+	return true;
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/boundaryProcessor.hpp b/src/phasicFlow/MPIParallelization/boundaryProcessor.hpp
new file mode 100644
index 00000000..66b3b468
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/boundaryProcessor.hpp
@@ -0,0 +1,67 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __boundaryProcessor_hpp__
+#define __boundaryProcessor_hpp__
+
+
+#include "boundaryBase.hpp"
+
+namespace pFlow
+{
+
+class boundaryProcessor
+:
+ 	public boundaryBase
+{
+protected:
+
+	
+public:
+
+	TypeInfo("boundary<processor>");
+
+	boundaryProcessor(
+		const dictionary& 	dict,
+		const plane&    	bplane,
+		internalPoints& 	internal);
+
+	virtual 
+	~boundaryProcessor() = default;
+	
+	add_vCtor
+	(
+		boundaryBase,
+		boundaryProcessor,
+		dictionary
+	);
+
+	bool beforeIteratoin(uint32 iterNum, real t) override;
+
+	bool iterate(uint32 iterNum, real t) override;
+
+	bool afterIteration(uint32 iterNum, real t) override;
+
+
+};
+
+}
+
+#endif //__boundaryProcessor_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/dataIOMPI.cpp b/src/phasicFlow/MPIParallelization/dataIOMPI.cpp
new file mode 100644
index 00000000..30fd93cf
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI.cpp
@@ -0,0 +1,5 @@
+
+#include "gatherMaster.hpp"
+
+
+
diff --git a/src/phasicFlow/MPIParallelization/dataIOMPI.hpp b/src/phasicFlow/MPIParallelization/dataIOMPI.hpp
new file mode 100644
index 00000000..850cf69b
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI.hpp
@@ -0,0 +1,97 @@
+#ifndef __datIOMPI_hpp__
+#define __datIOMPI_hpp__
+
+#include "dataIO.hpp"
+#include "pFlowProcessors.hpp"
+
+#ifdef pFlow_Build_MPI
+    #include "gatherMaster.hpp"
+#endif
+
+namespace pFlow
+{
+
+template<typename T>
+class dataIOMPI
+:
+	public dataIO<T>
+{
+protected:
+    
+    bool gatherData(span<T> data ) override
+    {
+        
+        if(this->ioPattern_.isAllProcessorsDifferent())
+        {
+            this->bufferSpan_ = data;
+            return true;
+        }
+        
+        if( this->ioPattern_.isMasterProcessorDistribute())
+        {
+
+#ifdef pFlow_Build_MPI
+            
+            auto gatherT = pFlow::MPI::gatherMaster<T>(pFlowProcessors());
+            
+            if(!gatherT.gatherData(data))
+            {
+                fatalErrorInFunction<<"Error in gathering data to master"<<endl;
+                return false;
+            }
+
+            this->buffer_ = gatherT.moveData();
+
+            this->bufferSpan_ = makeSpan(this->buffer_);
+
+            return true;
+#else
+            notImplementedFunction;
+            fatalExit;
+            return false;
+#endif //pFlow_Build_MPI
+        
+        }
+
+        if( this->ioPattern_.isMasterProcessorOnly() || this->ioPattern_.isAllProcessorSimilar() )
+        {
+            if( this->ioPattern_.isMaster() )
+            {
+                this->bufferSpan_ = data;
+            }
+            else
+            {
+                this->bufferSpan_ = span<T>(nullptr, 0);
+                return true;
+            }
+        }
+
+        return false;
+    }
+public:
+
+	TypeInfo("dataIO<MPI>");
+
+	dataIOMPI(const IOPattern& iop)
+    :
+        dataIO<T>(iop)
+    {}
+
+	dataIOMPI(const dataIOMPI&) = default;
+
+	dataIOMPI(dataIOMPI&&) = default;
+
+
+	dataIOMPI& operator=(const dataIOMPI&) = default;
+
+	dataIOMPI& operator=(dataIOMPI&&) = default;
+
+	~dataIOMPI() = default;
+
+};
+
+
+}
+
+
+#endif
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
new file mode 100644
index 00000000..da66b8c2
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
@@ -0,0 +1,400 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "MPISimulationDomain.hpp"
+#include "systemControl.hpp"
+#include "rcb1DPartitioning.hpp"
+#include "scatteredMasterDistribute.hpp"
+#include "scatteredMasterDistributeChar.hpp"
+
+pFlow::MPISimulationDomain::MPISimulationDomain(systemControl& control)
+:
+    simulationDomain(control),
+    communication_(pFlowProcessors()),
+    subDomains_(pFlowProcessors()),
+    domainPartition_( makeUnique<rcb1DPartitioning>(subDict("decomposition"), globalBox_))
+{}
+
+bool pFlow::MPISimulationDomain::createBoundaryDicts()
+{
+    auto& boundaries = this->subDict("boundaries");
+    
+    this->addDict("MPIBoundaries", boundaries);
+    auto& mpiBoundaries = this->subDict("MPIBoundaries");
+
+    real neighborLength = boundaries.getVal<real>("neighborLength");
+
+    auto neighbors = findPlaneNeighbors();
+
+    for(uint32 i=0; i<sizeOfBoundaries(); i++)
+	{
+		word bName = bundaryName(i);
+		if( !boundaries.containsDictionay(bName) )
+		{
+			fatalErrorInFunction<<"dictionary "<< bName<<
+			"does not exist in "<< boundaries.globalName()<<endl;
+			return false;
+		}
+		auto& bDict = mpiBoundaries.subDict(bName);
+
+		if(!bDict.addOrKeep("neighborLength", neighborLength))
+		{
+			fatalErrorInFunction<<"error in adding neighborLength to "<< bName <<
+			"in dictionary "<< boundaries.globalName()<<endl;
+			return false;
+		}
+
+		if( neighbors[i] == -1 )
+        {
+            bDict.add("mirrorProcessorNo", processors::globalRank());
+        }
+        else
+        {
+            bDict.add("mirrorProcessorNo", neighbors[i]);
+            bDict.addOrReplace("type", "processor");
+        }
+	}
+
+    return true;
+}
+
+bool pFlow::MPISimulationDomain::setThisDomain()
+{
+    thisDomain_ = domain(domainPartition_->localBox());
+    if(!communication_.collectAllToAll(thisDomain_, subDomains_))
+    {
+        fatalErrorInFunction<< "Failed to distributed domains"<<endl;
+        return false;
+    }
+
+    return true;
+}
+
+std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
+{
+
+    std::vector<int> neighbors(sizeOfBoundaries(),  -2);
+    domain gDomain(globalBox_);
+
+    // left 
+    if( thisDomain_.left().parallelTouch( gDomain.left() ) )
+    {
+        neighbors[0] = -1;
+    }
+    
+    for(int i=0; i<sizeOfBoundaries(); i++)
+    {   
+        if(i == subDomains_.rank())continue;
+
+        if( thisDomain_.left().parallelTouch(
+            subDomains_[i].right()) )
+        {
+            neighbors[0] = i;
+            break;
+        }
+    }
+
+    // right 
+    if( thisDomain_.right().parallelTouch( gDomain.right() ) )
+    {
+        neighbors[1] = -1;
+    }
+
+    for(int i=0; i<sizeOfBoundaries(); i++)
+    {
+        
+        if(i == subDomains_.rank())continue;
+
+        if( thisDomain_.right().parallelTouch(
+            subDomains_[i].left()) )
+        {
+            neighbors[1] = i;
+            break;
+        }
+    }
+
+    // bottom
+    if( thisDomain_.bottom().parallelTouch( gDomain.bottom() ) )
+    {
+        neighbors[2] = -1;
+    }
+    
+    for(int i=0; i<sizeOfBoundaries(); i++)
+    {   
+        if(i == subDomains_.rank())continue;
+
+        if( thisDomain_.bottom().parallelTouch(
+            subDomains_[i].top()) )
+        {
+            neighbors[2] = i;
+            break;
+        }
+    }
+
+    // top
+    if( thisDomain_.top().parallelTouch( gDomain.top() ) )
+    {
+        neighbors[3] = -1;
+    }
+    
+    for(int i=0; i<sizeOfBoundaries(); i++)
+    {   
+        if(i == subDomains_.rank())continue;
+
+        if( thisDomain_.top().parallelTouch(
+            subDomains_[i].bottom()) )
+        {
+            neighbors[3] = i;
+            break;
+        }
+    }
+
+    // rear 
+    if( thisDomain_.rear().parallelTouch( gDomain.rear() ) )
+    {
+        neighbors[4] = -1;
+    }
+    
+    for(int i=0; i<sizeOfBoundaries(); i++)
+    {   
+        if(i == subDomains_.rank())continue;
+
+        if( thisDomain_.rear().parallelTouch(
+            subDomains_[i].front()) )
+        {
+            neighbors[4] = i;
+            break;
+        }
+    }
+
+    // front
+    if( thisDomain_.front().parallelTouch( gDomain.front() ) )
+    {
+        neighbors[5] = -1;
+    }
+    
+    for(int i=0; i<sizeOfBoundaries(); i++)
+    {   
+        if(i == subDomains_.rank())continue;
+
+        if( thisDomain_.front().parallelTouch(
+            subDomains_[i].rear()) )
+        {
+            neighbors[5] = i;
+            break;
+        }
+    }
+    return neighbors;
+}
+
+const pFlow::dictionary &
+pFlow::MPISimulationDomain::thisBoundaryDict() const
+{
+    return this->subDict("MPIBoundaries");
+}
+
+bool pFlow::MPISimulationDomain::initialUpdateDomains(span<realx3> pointPos)
+{
+    pFlagTypeHost flags(pointPos.size(), 0 , pointPos.size());
+    initialNumPoints_ = pointPos.size();
+    if( !domainPartition_->partition(pointPos, flags) )
+    {
+        return false;
+    }
+    
+    if(!setThisDomain()) return false;
+    if(!createBoundaryDicts()) return false;
+
+    return true;
+}
+
+pFlow::uint32 pFlow::MPISimulationDomain::initialNumberInThis() const
+{
+    uint32 numImport = domainPartition_->numberImportThisProc();
+    uint32 numExport = domainPartition_->numberExportThisProc();
+    return initialNumPoints_+ numImport - numExport;;
+}
+
+bool pFlow::MPISimulationDomain::initialTransferBlockData
+(  
+    span<char> src, 
+    span<char> dst, 
+    size_t sizeOfElement
+)const
+{
+    MPI::scatteredMasterDistribute<char> dataDist(sizeOfElement, pFlowProcessors());
+    
+    auto lists = domainPartition_->allExportLists();
+    
+    if(!dataDist.setDataMaps( lists ))
+    {
+        fatalErrorInFunction;
+        return false;
+    }
+
+    if(!dataDist.distribute(src, dst))
+    {
+        fatalErrorInFunction<<
+        "Error in distribute"<<endl;
+        return false;
+    }
+    return true;
+}
+
+bool pFlow::MPISimulationDomain::initialTransferBlockData
+(
+    span<realx3> src, 
+    span<realx3> dst
+)const
+{
+    
+    MPI::scatteredMasterDistribute<realx3> 
+        dataDist(pFlowProcessors());
+    auto lists = domainPartition_->allExportLists();
+
+    if(!dataDist.setDataMaps( lists ))
+    {
+        fatalErrorInFunction;
+        return false;
+    }
+    
+    if(!dataDist.distribute(src, dst))
+    {
+        fatalErrorInFunction<<
+        "Error in distribute"<<endl;
+        return false;
+    }
+
+    return true;
+}
+
+bool pFlow::MPISimulationDomain::initialTransferBlockData
+(
+    span<real> src, 
+    span<real> dst
+)const
+{
+    MPI::scatteredMasterDistribute<real> 
+        dataDist(pFlowProcessors());
+
+    auto lists = domainPartition_->allExportLists();
+
+    if(!dataDist.setDataMaps( lists ))
+    {
+        fatalErrorInFunction;
+        return false;
+    }
+
+    if(!dataDist.distribute(src, dst))
+    {
+        fatalErrorInFunction<<
+        "Error in distribute"<<endl;
+        return false;
+    }
+
+    return true;
+}
+
+bool pFlow::MPISimulationDomain::initialTransferBlockData
+(
+    span<uint32> src, 
+    span<uint32> dst
+)const
+{
+    MPI::scatteredMasterDistribute<uint32> 
+        dataDist(pFlowProcessors());
+
+    auto lists = domainPartition_->allExportLists();
+
+    if(!dataDist.setDataMaps( lists ))
+    {
+        fatalErrorInFunction;
+        return false;
+    }
+
+    if(!dataDist.distribute(src, dst))
+    {
+        fatalErrorInFunction<<
+        "Error in distribute"<<endl;
+        return false;
+    }
+
+    return true;
+}
+
+bool pFlow::MPISimulationDomain::initialTransferBlockData
+(
+    span<int32> src, 
+    span<int32> dst
+)const
+{
+    MPI::scatteredMasterDistribute<int32> 
+        dataDist(pFlowProcessors());
+
+    auto lists = domainPartition_->allExportLists();
+
+    if(!dataDist.setDataMaps( lists ))
+    {
+        fatalErrorInFunction;
+        return false;
+    }
+
+    if(!dataDist.distribute(src, dst))
+    {
+        fatalErrorInFunction<<
+        "Error in distribute"<<endl;
+        return false;
+    }
+
+    return true;
+}
+
+/*bool pFlow::MPISimulationDomain::updateDomains(
+    span<realx3> pointPos,
+    pFlagTypeHost flags)
+{
+    if( !domainPartition_->partition(pointPos, flags) )
+    {
+        return false;
+    }
+        
+    if(!setThisDomain()) return false;
+    if(!createBoundaryDicts()) return false;
+	
+    return true;
+}*/
+
+pFlow::uint32 pFlow::MPISimulationDomain::numberToBeImported() const
+{
+    return domainPartition_->numberImportThisProc();
+}
+
+pFlow::uint32 pFlow::MPISimulationDomain::numberToBeExported() const
+{
+    return domainPartition_->numberExportThisProc();
+}
+
+
+
+bool pFlow::MPISimulationDomain::requiresDataTransfer() const
+{
+    notImplementedFunction;
+    return false;
+}
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
new file mode 100644
index 00000000..b47a9201
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
@@ -0,0 +1,116 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __MPISimulationDomain_hpp__
+#define __MPISimulationDomain_hpp__
+
+#include "simulationDomain.hpp"
+#include "partitioning.hpp"
+#include "procVector.hpp"
+#include "procCommunication.hpp"
+
+namespace pFlow
+{
+
+class MPISimulationDomain
+:
+    public simulationDomain
+{
+protected:
+    MPI::procCommunication      communication_;
+
+    MPI::procVector<domain>      subDomains_;
+
+    uniquePtr<partitioning> domainPartition_ = nullptr;
+
+    uint32 initialNumPoints_ = 0;
+
+    bool createBoundaryDicts() override;
+
+	bool setThisDomain() override;
+
+    std::vector<int> 
+    findPlaneNeighbors()const;
+
+public:
+
+    TypeInfo("simulationDomain<MPI>");
+
+    MPISimulationDomain(systemControl& control);
+
+	virtual
+	~MPISimulationDomain()=default;
+
+    add_vCtor
+	(
+		simulationDomain,
+		MPISimulationDomain,
+		systemControl
+	);
+
+	const dictionary& thisBoundaryDict()const override;
+    
+    /// @brief 
+    /// @param pointPos 
+    /// @return 
+    bool initialUpdateDomains(span<realx3> pointPos)override;
+
+    /// @brief 
+    /// @return 
+    uint32 initialNumberInThis()const override;
+
+    bool initialTransferBlockData(
+		span<char> src, 
+		span<char> dst,
+		size_t sizeOfElement)const override;
+    
+    bool initialTransferBlockData(
+        span<realx3> src,
+        span<realx3> dst) const override;
+	
+    bool initialTransferBlockData(
+        span<real> src,
+        span<real> dst) const override;
+    
+    bool initialTransferBlockData(
+        span<uint32> src,
+        span<uint32> dst) const override;
+    
+    bool initialTransferBlockData(
+        span<int32> src,
+        span<int32> dst) const override;
+    
+
+    /*bool updateDomains(
+		span<realx3> pointPos,
+        pFlagTypeHost flags) override;*/
+
+
+	uint32 numberToBeImported()const override;
+    
+    uint32 numberToBeExported()const override;
+	
+	bool requiresDataTransfer() const override;
+    
+
+};
+
+}
+
+#endif
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/gatherMaster.hpp b/src/phasicFlow/MPIParallelization/gatherMaster.hpp
new file mode 100644
index 00000000..ca1ecc77
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/gatherMaster.hpp
@@ -0,0 +1,105 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __gatherMaster_hpp__
+#define __gatherMaster_hpp__
+
+#include <numeric>
+
+#include "procCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class gatherMaster
+:
+    public procCommunication
+{
+protected:
+
+    std::vector<T> buffer_;
+
+public:
+
+    gatherMaster(const localProcessors& procs)
+    :
+        procCommunication(procs)
+    {}
+
+    span<T> getData()
+    {
+        if(this->localMaster())
+            return span<T>( buffer_.data(), buffer_.size());
+        else
+            return span<T>(nullptr, 0);
+    }
+
+    std::vector<T> moveData()
+    {
+        return std::move(buffer_);
+    }
+
+    bool gatherData(span<T> data)
+    {
+        int thisN = data.size();
+
+        bool succss;
+        
+        procVector<int> numElems(this->processors(), true);
+        procVector<int> displ(this->processors(), true);      
+        
+        if( !this->collectAllToMaster(thisN, numElems) )
+        {
+            fatalErrorInFunction<<
+            "error in collecting number of elements from processors"<<endl;
+            return false;
+        }
+        auto totalN = std::accumulate(
+            numElems.begin(), 
+            numElems.end(),
+            static_cast<int>(0));
+        
+        buffer_.resize(totalN);
+    
+        std::exclusive_scan(
+            numElems.begin(), 
+            numElems.end(),
+            displ.begin(),
+            0);
+        
+        auto bufferSpan = makeSpan(buffer_);
+
+        return CheckMPI( 
+            Gatherv(
+                data, 
+                bufferSpan, 
+                makeSpan(numElems), 
+                makeSpan(displ), 
+                this->localMasterNo(), 
+                this->localCommunicator()),
+            false);
+        
+    }
+
+
+};
+}
+
+#endif
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/mpiCommunication.hpp b/src/phasicFlow/MPIParallelization/mpiCommunication.hpp
new file mode 100644
index 00000000..4c43038d
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/mpiCommunication.hpp
@@ -0,0 +1,383 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __mpiCommunication_H__
+#define __mpiCommunication_H__
+
+
+#include "mpiTypes.hpp"
+#include "types.hpp"
+#include "span.hpp"
+
+#ifdef pFlow_Build_MPI
+
+
+
+namespace pFlow::MPI
+{
+
+extern DataType realx3Type__;
+
+extern DataType realx4Type__;
+
+extern DataType int32x3Type__;
+
+template<typename T> 
+auto constexpr Type()
+{
+	return MPI_BYTE;
+}
+
+template<typename T>
+auto constexpr sFactor()
+{
+	return sizeof(T);
+}
+
+template<char> 
+auto constexpr Type()
+{
+	return MPI_CHAR;
+}
+template<char>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<short>
+auto constexpr Type()
+{
+	return MPI_SHORT;
+}
+template<short>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<unsigned short>
+auto constexpr Type()
+{
+	return MPI_UNSIGNED_SHORT;
+}
+template<unsigned short>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<int>
+auto constexpr Type()
+{
+	return MPI_INT;
+}
+template<int>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<unsigned int>()
+{
+	return MPI_UNSIGNED;
+}
+template<>
+auto constexpr sFactor<unsigned int>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<long>()
+{
+	return MPI_LONG;
+}
+template<>
+auto constexpr sFactor<long>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<unsigned long>()
+{
+	return MPI_UNSIGNED_LONG;
+}
+template<>
+auto constexpr sFactor<unsigned long>()
+{
+	return 1;
+}
+
+
+template<>
+auto constexpr Type<float>()
+{
+	return MPI_FLOAT;
+}
+template<>
+auto constexpr sFactor<float>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<double>()
+{
+	return MPI_DOUBLE;
+}
+template<>
+auto constexpr sFactor<double>()
+{
+	return 1;
+}
+
+template<>
+inline
+auto Type<realx3>()
+{
+	return realx3Type__;
+}
+
+template<>
+auto constexpr sFactor<realx3>()
+{
+	return 1;
+}
+
+template<>
+inline
+auto Type<realx4>()
+{
+	return realx4Type__;
+}
+
+template<>
+auto constexpr sFactor<realx4>()
+{
+	return 1;
+}
+
+
+template<>
+inline
+auto Type<int32x3>()
+{
+	return int32x3Type__;
+}
+
+
+template<>
+auto constexpr sFactor<int32x3>()
+{
+	return 1;
+}
+
+/*inline 
+auto createByteSequence(int sizeOfElement)
+{
+    DataType newType;
+    MPI_Type_contiguous(sizeOfElement, MPI_CHAR, &newType);
+	MPI_Type_commit(&newType);
+    return newType;
+}*/
+
+inline 
+auto TypeCommit(DataType* type)
+{
+	return MPI_Type_commit(type);
+}
+
+inline 
+auto TypeFree(DataType* type)
+{
+    return MPI_Type_free(type);
+
+}
+template<typename T>
+inline auto getCount(Status* status, int& count)
+{
+	int lCount;
+	auto res = MPI_Get_count(status, Type<T>(), &lCount);
+	count = lCount/sFactor<T>();
+	return res;
+}
+
+template<typename T>
+inline int convertIndex(const int& ind)
+{
+	return ind*sFactor<T>();
+}
+
+template<typename T> 
+inline auto send(span<T> data, int dest, int tag, Comm comm)
+{
+	return MPI_Send(
+        data.data(), 
+        sFactor<T>()*data().size(), 
+        Type<T>(), 
+        dest, 
+        tag, 
+        comm);
+}
+
+
+
+template<typename T>
+inline auto recv(span<T> data, int source, int tag, Comm comm, Status *status)
+{
+	return MPI_Recv(
+        data.data(), 
+        sFactor<T>()*data.size(), 
+        Type<T>(), 
+        source, 
+        tag, 
+        comm, 
+        status);
+}
+
+
+template<typename T>
+inline auto scan(T sData, T& rData, Comm comm, Operation op = SumOp)
+{
+	return MPI_Scan(&sData, &rData, sFactor<T>()*1, Type<T>(), op , comm );
+}
+
+// gathering one scalar data to root processor 
+template<typename T>
+inline auto gather(T sendData, span<T>& recvData, int root, Comm comm)
+{
+	return MPI_Gather(
+		&sendData, 
+		sFactor<T>()*1, 
+		Type<T>(), 
+		recvData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		root,
+		comm);
+}
+
+template<typename T>
+inline auto allGather(T sendData, span<T>& recvData, Comm comm)
+{
+	return MPI_Allgather(
+		&sendData,
+		sFactor<T>()*1,
+		Type<T>(),
+		recvData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		comm);
+}
+
+template<typename T>
+inline auto scatter(span<T> sendData, T& recvData, int root, Comm comm)
+{
+	return MPI_Scatter(
+		sendData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		&recvData,
+		sFactor<T>()*1,
+		Type<T>(),
+		root,
+		comm);
+}
+
+template<typename T>
+inline auto Bcast(T& sendData, int root, Comm comm)
+{
+	return MPI_Bcast(
+		&sendData, sFactor<T>()*1, Type<T>(), root, comm);
+
+}
+
+template<typename T> 
+bool typeCreateIndexedBlock(
+    span<int32> index, 
+    DataType &newType)
+{
+    auto res =  MPI_Type_create_indexed_block(
+        index.size(), 
+        sFactor<T>(), 
+        index.data(), 
+        Type<T>(), 
+        &newType);
+    
+    if(res == Success)
+    {
+        TypeCommit(&newType);
+    }
+    else
+    {
+        return false;
+    }
+
+    return true;	
+}
+
+
+template<typename T>
+inline auto Gatherv
+(
+    span<T> sendData, 
+    span<T>& recvData, 
+    span<int> recvCounts,
+    span<int> displs,
+    int root, 
+    Comm comm)
+{
+    
+    return MPI_Gatherv(
+        sendData.data(), 
+        sendData.size()*sFactor<T>(),
+        Type<T>(),
+        recvData.data(),
+        recvCounts.data(),
+        displs.data(),
+        Type<T>(),
+        root,
+        comm
+         );
+
+}
+
+inline auto Wait(Request* request, Status* status)
+{
+	return MPI_Wait(request, status);
+}
+
+inline auto typeFree(DataType& type)
+{
+	return MPI_Type_free(&type);
+}
+
+
+}
+
+#endif //pFlow_Build_MPI
+
+
+
+
+#endif  //__mpiCommunication_H__
diff --git a/src/phasicFlow/MPIParallelization/mpiTypes.hpp b/src/phasicFlow/MPIParallelization/mpiTypes.hpp
new file mode 100644
index 00000000..c1721290
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/mpiTypes.hpp
@@ -0,0 +1,75 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __mpiTypes_H__
+#define __mpiTypes_H__
+
+
+#ifdef pFlow_Build_MPI
+
+#include <mpi.h>
+
+namespace pFlow::MPI
+{
+	// types
+	using Comm 			= MPI_Comm;
+    using Group         = MPI_Group;
+	using Status 		= MPI_Status;
+	using Offset 		= MPI_Offset;
+	using Request 		= MPI_Request;
+	using Operation 	= MPI_Op;
+	using Information	= MPI_Info;
+	using DataType 		= MPI_Datatype;
+	
+	inline Comm CommWorld 		= MPI_COMM_WORLD;
+
+	// all nulls
+
+	inline auto ProcNull 		= MPI_PROC_NULL;
+	inline auto InfoNull  		= MPI_INFO_NULL;
+	inline auto RequestNull		= MPI_REQUEST_NULL;
+	inline auto StatusIgnore 	= MPI_STATUS_IGNORE;
+	inline auto StatusesIgnore 	= MPI_STATUSES_IGNORE;
+	inline auto FileNull 		= MPI_FILE_NULL;
+	inline Comm  CommNull 		= MPI_COMM_NULL;
+    inline auto TypeNull        = MPI_DATATYPE_NULL;
+
+	// errors
+	inline const auto Success 	= MPI_SUCCESS;
+	inline const auto ErrOp 	= MPI_ERR_OP;
+
+	inline const auto SumOp		= MPI_SUM;
+
+	inline const size_t MaxNoProcessors = 2048;
+	
+}
+
+#else 
+
+namespace pFlow::MPI
+{
+	
+}
+
+#endif // pFlow_Build_MPI
+
+
+
+#endif //__mpiTypes_H__
diff --git a/src/phasicFlow/MPIParallelization/partitioning.cpp b/src/phasicFlow/MPIParallelization/partitioning.cpp
new file mode 100644
index 00000000..0ae5cf82
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/partitioning.cpp
@@ -0,0 +1,113 @@
+
+
+#include "partitioning.hpp"
+#include "error.hpp"
+#include "streams.hpp"
+
+void pFlow::partitioning::freeZoltan()
+{
+	if(validPointers_)
+	{
+		Zoltan::LB_Free_Part(&importGlobalGids_, &importLocalGids_, 
+                      	 	 &importProcs_, &importToPart_);
+
+		Zoltan::LB_Free_Part(&exportGlobalGids_, &exportLocalGids_, 
+                      		 &exportProcs_, &exportToPart_);
+        validPointers_ = false;
+	}
+	
+	zoltan_.release();
+}
+
+
+pFlow::partitioning::partitioning
+(
+    const dictionary& dict, 
+    const box& globalBox
+)
+:
+	globalBox_(globalBox)
+{
+	if(!zoltanInitialized__)
+	{
+		auto rc = Zoltan_Initialize
+        (
+            processors::argc(), 
+            processors::argv(), 
+            &version_
+        );
+        
+		if (rc != ZOLTAN_OK)
+		{
+			fatalErrorInFunction<<"Cannot initialize zoltan"<<endl;
+			fatalExit;
+		}
+		zoltanInitialized__ = true;
+	}
+
+	// Creates Zoltan object 
+	zoltan_ = std::make_unique<Zoltan>(pFlowProcessors().localCommunicator());
+
+	zoltan_->Set_Param("DEBUG_LEVEL", "0");
+  	zoltan_->Set_Param("LB_METHOD", "RCB");
+  	zoltan_->Set_Param("NUM_GID_ENTRIES", "1"); 
+  	zoltan_->Set_Param("NUM_LID_ENTRIES", "1");
+  	zoltan_->Set_Param("OBJ_WEIGHT_DIM", "0");
+    zoltan_->Set_Param("RETURN_LISTS", "ALL");
+	
+}
+
+bool pFlow::partitioning::partition(span<realx3> points, pFlagTypeHost flags)
+{
+    pointCollection pointCollctn{points, flags};
+
+    return partition(pointCollctn);
+}
+int GetObjectSize
+(
+    void *data,
+    int num_gid_entries, 
+    int num_lid_entries,
+    ZOLTAN_ID_PTR global_id,
+    ZOLTAN_ID_PTR local_id,
+    int *ierr
+)
+{
+    *ierr = ZOLTAN_OK;
+    pFlow::uint32 s = *(static_cast<pFlow::uint32*>(data));
+    return static_cast<int>(s);
+}
+
+void PackObject 
+(
+    void *data,
+    int num_gid_entries,
+    int num_lid_entries,
+    ZOLTAN_ID_PTR global_id,
+    ZOLTAN_ID_PTR local_id,
+    int dest,
+    int size,
+    char *buf,
+    int *ierr
+)
+{
+    
+}
+
+bool pFlow::partitioning::migrateData(span<char> src, span<char> dst, uint32 elementSize)
+{
+    dataCollection data{src, dst, elementSize};
+
+    zoltan_->Set_Obj_Size_Fn(GetObjectSize, &elementSize);
+    return false;
+}
+
+pFlow::partitioning::~partitioning()
+{
+	freeZoltan();
+}
+
+void pFlow::partitioning::printBox()const
+{
+	pOutput<< "localBox:" << localBox_<<endl;
+}
diff --git a/src/phasicFlow/MPIParallelization/partitioning.hpp b/src/phasicFlow/MPIParallelization/partitioning.hpp
new file mode 100644
index 00000000..c9483051
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/partitioning.hpp
@@ -0,0 +1,168 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __partitioning_hpp__
+#define __partitioning_hpp__
+
+#include "zoltan_cpp.h"
+
+#include "pFlowProcessors.hpp"
+#include "virtualConstructor.hpp"
+#include "box.hpp"
+#include "span.hpp"
+#include "pointFlag.hpp"
+#include "procVector.hpp"
+
+namespace pFlow
+{
+
+struct pointCollection
+{
+	span<realx3> points_;
+	pFlagTypeHost pFlag_;
+
+	uint32 numActivePoints()const
+	{
+		return pFlag_.numActive();
+	}
+};
+
+struct dataCollection
+{
+    span<char> srcData_;
+    span<char> dstData_;
+    uint32 elementSize_;
+};
+
+class partitioning
+{
+protected:
+
+	float 					version_ 	= 0.0;
+
+	std::unique_ptr<Zoltan> zoltan_ 	= nullptr;
+
+	bool					validPointers_ = false;
+
+	box 					globalBox_;
+
+	box 					localBox_; 	
+
+	int32 	changes_, numImport_, numExport_;
+  	
+  	id_t *importGlobalGids_, *importLocalGids_, *exportGlobalGids_, *exportLocalGids_; 
+  	
+  	int32 *importProcs_, *importToPart_, *exportProcs_, *exportToPart_;
+
+    uint32 numBeforePartition_ = 0 ;
+
+	static inline bool 		zoltanInitialized__ = false;
+
+	void freeZoltan();
+
+	virtual 
+	bool partition(pointCollection& points) = 0;
+
+public:
+
+	partitioning(
+		const dictionary& dict, 
+		const box& globalBox);
+
+	virtual 
+	~partitioning();
+
+    create_vCtor(
+        partitioning,
+        dictionary,
+        (
+            const dictionary& dict, 
+		    const box& globalBox
+        ),
+        (dict, globalBox));
+
+	bool partition(
+		span<realx3> points, 
+		pFlagTypeHost flags);
+
+    
+    bool migrateData(span<char> src, span<char> dst, uint32 elementSize);
+
+	inline
+	auto localBox()const
+	{
+		return localBox_;
+	}
+
+	inline
+	const auto& globalBox()const
+	{
+		return globalBox_;
+	}
+
+	inline 
+	bool partitionsChanged()const
+	{
+		return changes_ == 1;
+	}
+
+    
+    uint32 numberImportThisProc()const
+    {
+        return numImport_;
+    }
+
+    uint32 numberExportThisProc()const
+    {
+        return numExport_;
+    }
+    
+    virtual
+    span<int32> exportList(int procNo)const = 0;
+
+    virtual 
+    pFlow::MPI::procVector<span<int32>> allExportLists()const=0;
+
+	void printBox()const;
+
+	
+};
+
+
+}
+
+
+#endif //__partitioning_hpp__
+
+
+
+/*static 
+	int getNumberOfPoints(void *data, int32 *ierr);
+
+	static 
+	void getPointList(
+		void *data, 
+		int32 sizeGID, 
+		int32 sizeLID,
+        id_t* globalID, 
+        id_t* localID,
+        int32 wgt_dim, 
+        float *obj_wgts, 
+        int32 *ierr);*/
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/procCommunication.cpp b/src/phasicFlow/MPIParallelization/procCommunication.cpp
new file mode 100644
index 00000000..81869453
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/procCommunication.cpp
@@ -0,0 +1,30 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "procCommunication.hpp"
+
+
+pFlow::MPI::procCommunication::procCommunication
+(
+    const localProcessors& proc
+)
+:
+    processors_(proc)
+{}
diff --git a/src/phasicFlow/MPIParallelization/procCommunication.hpp b/src/phasicFlow/MPIParallelization/procCommunication.hpp
new file mode 100644
index 00000000..db600386
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/procCommunication.hpp
@@ -0,0 +1,178 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __procCommunication_hpp__
+#define __procCommunication_hpp__
+
+
+#include "procVector.hpp"
+#include "localProcessors.hpp"
+#include "mpiCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+
+class procCommunication
+{
+protected:
+
+	const localProcessors& processors_;
+
+public:	
+
+    procCommunication(const localProcessors& proc);
+    	
+	~procCommunication()=default;
+    
+    /// @brief Tell if this processor is master processor in the local
+    /// communicator 
+    /// @return true if this processor is master  
+
+    inline 
+    const auto& processors()const
+    {
+        return processors_;
+    }
+
+    inline
+    bool localMaster()const
+    {
+        return processors_.localMaster();;
+    }
+
+    inline
+    auto localSize()const
+    {
+        return processors_.localSize();
+    }
+
+    inline
+    auto localRank()const
+    {
+        return processors_.localRank();
+    }
+
+    inline
+    auto localCommunicator()const
+    {
+        return processors_.localCommunicator();
+    }
+
+    /// @brief return the master number in the local communicator  
+    auto localMasterNo()const
+    {
+        return processors_.localMasterNo();
+    }
+
+	/// Send a single val to all processors including itself (local communicator)
+	template<typename T>
+	std::pair<T,bool> distributeMasterToAll(const T& val)
+	{
+		
+		T retVal = val;
+		auto res = CheckMPI(
+            Bcast(retVal, localMasterNo(),localCommunicator() ),
+            false);
+		
+		return {retVal, res};
+	}
+
+	/// @brief  Send a single value to all processor including master (in local communicator)
+	/// @param val value to be sent
+	/// @param recvVal recieved value 
+	/// @return true if successful and false if fail
+	template<typename T>
+	bool distributeMasterToAll(const T& val, T& recvVal)
+	{
+		recvVal = val;
+		return CheckMPI(
+            Bcast(recvVal, localMasterNo(), localCommunicator()),
+            false);
+	}
+
+	/// @brief  values in the vector (size is equal to number of 
+    // processors in local communicator) to each processor 
+	template<typename T>
+	std::pair<T,bool> distributeMasterToAll(const procVector<T>& vals)
+	{
+		T val;	
+		auto vec = vals.getSpan();
+		auto res = CheckMPI(
+            scatter(vec, val, localMasterNo(), localCommunicator()),
+            false);
+		
+		return {val, res};
+	}
+
+    /// @brief Each processor in the local communicator calls this funtion with a value 
+    /// and the values are distributed among all processors 
+	template<typename T>
+	std::pair<procVector<T>, bool> collectAllToAll(const T& val)
+	{
+		procVector<T> allVec;
+		auto vec = allVec.getSpan();
+		auto res = CheckMPI(
+            allGather(val, vec, localCommunicator()), 
+            false);
+		return {allVec, res};
+	}
+
+    /// @brief Each processor in the local communicator calls this funtion with a value 
+    /// and the values are distributed among all processors 
+	template<typename T>
+	bool collectAllToAll(const T& val, procVector<T>& allVec)
+	{
+		auto vec = allVec.getSpan();
+		return CheckMPI(
+            allGather(val, vec, localCommunicator()), 
+            false);
+	}
+
+    /// @brief Each processor in the local communicator calls this function with a value
+    /// and all values are collected in the master processor 
+	template<typename T>
+	std::pair<procVector<T>,bool> collectAllToMaster(const T& val)
+	{
+		// only on master processor
+		procVector<T> masterVec(processors_, true);
+		
+		auto masterSpan = masterVec.getSpan();
+		auto res = CheckMPI( 
+			gather(val,masterSpan, localMasterNo(), localCommunicator()), 
+            false);
+
+		return {masterVec, res};
+
+	}
+
+    template<typename T>
+	bool collectAllToMaster(const T& val, procVector<T>& masterVec)
+	{
+		// only on master processor
+		auto [vec, res] = collectAllToMaster(val);
+        masterVec = vec;
+		return res;
+	}
+
+}; //procCommunication
+
+} // pFlow::MPI
+
+#endif //__procCommunication_hpp__
diff --git a/src/phasicFlow/MPIParallelization/procVector.hpp b/src/phasicFlow/MPIParallelization/procVector.hpp
new file mode 100644
index 00000000..f9a80037
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/procVector.hpp
@@ -0,0 +1,199 @@
+#ifndef __procVector_hpp__ 
+#define __procVector_hpp__
+
+// from PhasicFlow
+
+#include "localProcessors.hpp"
+#include "span.hpp"
+#include "streams.hpp"
+#include "IOPattern.hpp"
+
+#include "mpiTypes.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class procVector
+:
+	public std::vector<T>
+{
+public:
+
+	using ProcVectorType = procVector<T>;
+
+	using VectorType = std::vector<T>;
+
+protected:
+
+	int 	rank_ 	= 0;
+
+    bool 	isMaster_ = false;
+
+	using VectorType::reserve;
+
+	using VectorType::resize;
+
+	using VectorType::assign;
+
+	using VectorType::clear;
+
+	using VectorType::erase;
+
+public:
+
+	procVector(
+		const localProcessors& procs,
+		bool onlyMaster = false)
+    :
+        rank_(procs.localRank()),
+        isMaster_(procs.localMaster())
+	{
+       
+		if( onlyMaster && !isMaster_ ) return;
+		this->reserve(procs.localSize());
+		this->resize(procs.localSize());
+	}
+
+	procVector(
+		const T& val,
+		const localProcessors& procs,
+		bool onlyMaster = false)
+	:
+		procVector(procs, onlyMaster)
+	{
+		std::fill(this->begin(), this->end(), val);
+	}
+
+    procVector(const T& val, const procVector& src)
+    {
+        this->reserve(src.size());
+        this->resize(src.size());
+        std::fill(this->begin(), this->end(), val);
+    }
+
+    procVector(const localProcessors& procs, const VectorType& src)
+	:
+		procVector(procs)
+	{
+		if(src.size()!= this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in construction"<<endl;
+			fatalExit;
+		}
+
+        this->assign(src.begin(), src.end());
+	} 
+        
+	procVector(const procVector&) = default;
+	
+	procVector(procVector&&) = default;
+
+	procVector& operator=(const procVector&) = default;
+	
+	procVector& operator=(procVector&&) = default;
+
+	procVector& operator=(const VectorType& src)
+	{
+		if(src.size() != this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in copy assignment"<<endl;
+			fatalExit;
+		}
+
+		static_cast<VectorType&>(*this).operator=(src);
+		return *this;
+	}
+
+	procVector& operator=(VectorType&& src)
+	{
+		if(src.size() != this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in move assignment"
+			<<endl;
+			fatalExit;
+		}
+
+		static_cast<VectorType&>(*this).operator=(std::move(src));
+		return *this;
+	}
+
+	procVector(const localProcessors& procs, VectorType&& src)
+	:
+		VectorType(std::move(src))
+	{
+		if(this->size()!= static_cast<size_t>(procs.localSize()))
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in move"<<endl;
+            fatalExit;
+		}
+        isMaster_ = procs.localMaster();
+        rank_ = procs.localRank();
+	}
+
+	~procVector()=default;
+
+    inline
+	auto& thisValue()
+	{
+		return VectorType::operator[](rank_);
+	}
+
+    inline
+	const auto& thisValue()const
+	{
+		return VectorType::operator[](rank_);
+	}
+
+    inline
+	auto size()const
+	{
+		return VectorType::size();
+	}
+
+    inline
+	auto rank()const
+	{
+		return rank_;
+	}
+
+	inline
+	auto getSpan()
+	{
+		return span<T>(this->data(), this->size());
+	}
+	
+	inline 
+	auto getSpan()const
+	{
+		return span<T>(const_cast<T*>(this->data()), this->size());
+	}
+
+    bool write(
+        iOstream& os,
+        const IOPattern& iop ) const
+    {
+        return writeStdVector(os, *this, iop);	
+    }
+
+};
+
+template<typename T> 
+inline iOstream& operator << (iOstream& os, const procVector<T>& ovec )
+{	
+	if( !ovec.write(os, IOPattern::AllProcessorsDifferent) )
+	{
+		ioErrorInFile(os.name(), os.lineNumber());
+		fatalExit;
+	}
+	return os; 
+}
+
+}
+
+
+#endif
diff --git a/src/phasicFlow/MPIParallelization/processorBoundaryField.cpp b/src/phasicFlow/MPIParallelization/processorBoundaryField.cpp
new file mode 100644
index 00000000..5e94d0aa
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/processorBoundaryField.cpp
@@ -0,0 +1,29 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+template<class T, class MemorySpace>
+    pFlow::processorBoundaryField<T, MemorySpace>::processorBoundaryField
+(
+	const boundaryBase& boundary, 
+	InternalFieldType& internal
+)
+:
+    BoundaryFieldType(boundary, internal)
+{}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/processorBoundaryField.hpp b/src/phasicFlow/MPIParallelization/processorBoundaryField.hpp
new file mode 100644
index 00000000..b3e83a22
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/processorBoundaryField.hpp
@@ -0,0 +1,80 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __processorBoundaryField_hpp__
+#define __processorBoundaryField_hpp__
+
+#include "boundaryField.hpp"
+
+namespace pFlow
+{
+
+template< class T, class MemorySpace = void>
+class processorBoundaryField
+:
+    public boundaryField<T, MemorySpace> 
+{
+public:
+	
+    using processorBoundaryFieldType = processorBoundaryField<T, MemorySpace>;
+
+	using BoundaryFieldType = boundaryField<T, MemorySpace>;
+
+	using InternalFieldType = typename BoundaryFieldType::InternalFieldType;
+
+	using memory_space 		= typename BoundaryFieldType::memory_space;
+
+	using execution_space 	= typename BoundaryFieldType::execution_space;
+
+   
+
+public:
+
+	TypeInfo("boundaryField<processor>");
+
+	processorBoundaryField(
+		const boundaryBase& boundary, 
+		InternalFieldType& internal);
+		
+
+	add_vCtor
+	(
+		BoundaryFieldType,
+		processorBoundaryFieldType,
+		boundaryBase
+	);
+
+
+	bool hearChanges
+	(
+		const message& msg, 
+    	const anyList& varList
+	) override
+    {
+		notImplementedFunction;
+		return false;
+	}
+
+};
+
+}
+
+#include "processorBoundaryField.cpp"
+
+#endif //__processorBoundaryField_hpp__
diff --git a/src/phasicFlow/MPIParallelization/processorBoundaryFields.cpp b/src/phasicFlow/MPIParallelization/processorBoundaryFields.cpp
new file mode 100644
index 00000000..a81b5249
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/processorBoundaryFields.cpp
@@ -0,0 +1,10 @@
+
+//#include "Field.hpp"
+#include "createBoundaryFields.hpp"
+#include "processorBoundaryField.hpp"
+
+createBoundary(pFlow::int8, pFlow::HostSpace, processor);
+
+createBoundary(pFlow::real, pFlow::HostSpace, processor);
+
+
diff --git a/src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp b/src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp
new file mode 100644
index 00000000..3a22ae1f
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp
@@ -0,0 +1,319 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "zoltan_cpp.h"
+
+
+#include "error.hpp"
+#include "processors.hpp"
+#include "rcb1DPartitioning.hpp"
+
+bool pFlow::rcb1DPartitioning::partition(pointCollection &points)
+{
+    
+    zoltan_->Set_Param("RCB_OUTPUT_LEVEL", "0");
+  	zoltan_->Set_Param("RCB_RECTILINEAR_BLOCKS", "1");
+  	zoltan_->Set_Param("KEEP_CUTS", "1"); 
+  	zoltan_->Set_Param("REDUCE_DIMENSIONS", "1");
+  	zoltan_->Set_Param("RCB_RECOMPUTE_BOX", "1");
+  	zoltan_->Set_Param("AVERAGE_CUTS", "0");
+    zoltan_->Set_Param("MIGRATE_ONLY_PROC_CHANGES", "0");
+
+  	zoltan_->Set_Num_Obj_Fn(rcb1DPartitioning::getNumberOfPoints, &points);
+  	zoltan_->Set_Obj_List_Fn(rcb1DPartitioning::getPointList, &points);
+  	zoltan_->Set_Num_Geom_Fn(rcb1DPartitioning::getNumGeometry, &points);
+  	switch (direction_)
+    {
+    case Direction::X:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_x, &points);
+        break;
+    case Direction::Y:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_y, &points);
+        break;
+    case Direction::Z:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_z, &points);
+        break;
+    }
+    
+	int numGidEntries_, numLidEntries_;
+	int rc = zoltan_->LB_Partition(changes_, numGidEntries_, numLidEntries_,
+    	numImport_, importGlobalGids_, importLocalGids_, importProcs_, importToPart_,
+    	numExport_, exportGlobalGids_, exportLocalGids_, exportProcs_, exportToPart_);
+
+  	if (rc != ZOLTAN_OK)
+  	{	
+        fatalErrorInFunction<< "Zoltan faild to perform partitioning."<<endl;
+    	return false;
+  	}
+    
+    for(auto& ids:exportIds_)
+    {
+        ids.clear();
+    }
+    
+    std::vector<int32> thisProc(points.numActivePoints(),0);
+    
+        
+    for(auto i =0; i<numExport_; i++)
+    {
+        exportIds_[exportProcs_[i]].push_back(exportGlobalGids_[i]);
+        thisProc[exportGlobalGids_[i]] = exportGlobalGids_[i];
+    }
+
+    for(int i=0; i<thisProc.size(); i++)
+    {
+        if(thisProc[i]==0)
+            exportIds_[0].push_back(i);
+    }
+
+  	validPointers_ = true;
+
+  	int nDim;
+  	double x0, y0, z0, x1, y1,z1;
+  	zoltan_->RCB_Box
+    (
+        processors::globalRank(), 
+        nDim,
+   		x0, y0, z0,
+   		x1, y1, z1
+    );
+
+  	localBox_ = globalBox_;
+
+    switch (direction_)
+    {
+    case Direction::X :
+        localBox_.minPoint().x_ = x0;
+  	    localBox_.maxPoint().x_ = x1;
+        break;
+
+    case Direction::Y :
+        localBox_.minPoint().y_ = x0;
+  	    localBox_.maxPoint().y_ = x1;
+        break;
+    
+    case Direction::Z :
+        localBox_.minPoint().z_ = x0;
+  	    localBox_.maxPoint().z_ = x1;
+        break;
+    }
+  	
+
+   	localBox_.minPoint() = max(localBox_.minPoint(), globalBox_.minPoint());
+   	localBox_.maxPoint() = min(localBox_.maxPoint(), globalBox_.maxPoint());
+
+
+  	return true;
+}
+
+pFlow::rcb1DPartitioning::rcb1DPartitioning
+(
+    const dictionary &dict,
+    const box &globalBox
+)
+: 
+    partitioning(dict, globalBox),
+    exportIds_(pFlowProcessors())
+{
+
+    word directionName = dict.getVal<word>("direction");
+
+	if(toUpper(directionName)== "X")
+    {
+        direction_ = Direction::X;
+        dirVector_ ={1.0, 0.0, 0.0};
+    }
+    else if( toUpper(directionName) == "Y")
+    {
+        direction_ = Direction::Y;
+        dirVector_ ={0.0, 1.0, 0.0};
+    }
+    else if( toUpper(directionName) == "Z")
+    {
+        direction_ = Direction::Z;
+        dirVector_ ={0.0, 0.0, 1.0};
+    }
+    else
+    {
+        fatalErrorInFunction<< "wrong direction  in dictionary "<<
+        dict.globalName()<<". Directions should be one of x, y, or z."<<endl;
+        fatalError;
+    }       
+			
+}
+
+int pFlow::rcb1DPartitioning::getNumGeometry(void *data, int *ierr)
+{
+  *ierr = ZOLTAN_OK;
+  return 1;
+}
+
+int pFlow::rcb1DPartitioning::getNumberOfPoints(void *data, int *ierr)
+{
+    auto *obj = static_cast<pointCollection *>(data);
+
+    *ierr = ZOLTAN_OK;
+
+    return obj->numActivePoints();
+}
+
+void pFlow::rcb1DPartitioning::getPointList
+(
+    void *data, 
+    int sizeGID, 
+    int sizeLID, 
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID, 
+    int wgt_dim, 
+    float *obj_wgts, 
+    int *ierr
+)
+{			
+    auto* obj = static_cast<pointCollection *>(data);
+    *ierr = ZOLTAN_OK;
+    
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            globalID[n] = i;
+            localID[n] = n;
+            n++;
+        }
+    }
+
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_x
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].x_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_y
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].y_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_z
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].z_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
diff --git a/src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp b/src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp
new file mode 100644
index 00000000..ad5f7693
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp
@@ -0,0 +1,241 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __rcb1DPartitioning_hpp__
+#define __rcb1DPartitioning_hpp__
+
+#include "partitioning.hpp"
+#include "procVector.hpp"
+
+namespace pFlow
+{
+
+
+class rcb1DPartitioning
+:
+public partitioning
+{
+public:
+
+	enum Direction  
+	{
+		X = 0,
+		Y = 1,
+		Z = 2
+	};
+
+protected:
+
+	/// Direction of partitioning
+	Direction   direction_ = Direction::X;
+
+	realx3 		dirVector_ = {1.0, 0.0, 0.0};
+
+    word        directionName_ = "x";
+
+    MPI::procVector<std::vector<int>> exportIds_;
+	
+	bool partition(pointCollection& points) override;
+
+public:
+
+	
+	rcb1DPartitioning(
+		const dictionary& dict, 
+		const box& globalBox);
+	
+	
+	virtual 
+	~rcb1DPartitioning()=default;
+
+    span<int32> exportList(int procNo)const override
+    {
+        return span<int32>(
+            const_cast<int32*>(exportIds_[procNo].data()), 
+            exportIds_[procNo].size());
+    }
+
+    
+    pFlow::MPI::procVector<span<int32>> allExportLists()const override
+    {
+        pFlow::MPI::procVector<span<int32>> allList(pFlowProcessors());
+
+        for(int i=0; i<allList.size(); i++)
+            allList[i]= exportList(i);
+
+        return allList;
+    }
+
+	static 
+	int getNumGeometry(void *data, int *ierr);
+
+	static
+	int getNumberOfPoints(void *data, int *ierr);
+	
+
+	static
+	void getPointList
+	(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		ZOLTAN_ID_PTR globalID, 
+		ZOLTAN_ID_PTR localID,
+		int wgt_dim, 
+		float *obj_wgts, 
+		int *ierr
+	);
+
+    static
+    void getGeometryList_x(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+
+    static
+    void getGeometryList_y(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+
+    static
+    void getGeometryList_z(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+};
+
+/*class RCB_y_partitioning
+:
+public partitioning
+{
+public:
+
+	
+	RCB_y_partitioning(int argc, char *argv[], pointCollection& collection, const box& gBox)
+	:
+	partitioning(argc, argv, collection, gBox)
+	{}
+
+	virtual 
+	~RCB_y_partitioning()=default;
+
+	
+	bool partition() override;
+
+
+	static 
+	void getGeometryList(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		int num_obj,
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID,
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr)
+	{
+
+		auto* obj = static_cast<pointCollection *>(data);
+
+	  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+	  	{
+	    	*ierr = ZOLTAN_FATAL;
+	    	return;
+	  	}
+
+	  	*ierr = ZOLTAN_OK;
+
+	  	for (int i=0;  i < num_obj ; i++)
+	  	{
+	    	geom_vec[i] 	  = obj->pointList()[i].y_;
+		}
+
+	  return;
+	}
+
+	
+	static 
+	int getNumGeometry(void *data, int *ierr)
+	{
+	  *ierr = ZOLTAN_OK;
+	  return 1;
+	}
+
+};
+
+
+class RCB_x_partitioning
+:
+public partitioning
+{
+public:
+
+	
+	RCB_x_partitioning(int argc, char *argv[], pointCollection& collection, const box& gBox)
+	:
+	partitioning(argc, argv, collection, gBox)
+	{}
+
+	virtual 
+	~RCB_x_partitioning()=default;
+
+	
+	bool partition() override;
+
+
+	static 
+	void getGeometryList(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		int num_obj,
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID,
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+	
+	static 
+	int getNumGeometry(void *data, int *ierr);
+	
+
+};*/
+
+} // pFlow
+#endif //__rcb1DPartitioning_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.cpp b/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.cpp
new file mode 100644
index 00000000..a771dc54
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.cpp
@@ -0,0 +1,158 @@
+
+
+template<typename T>
+pFlow::MPI::scatteredMasterDistribute<T>::scatteredMasterDistribute
+(
+    const localProcessors& procs
+)
+:
+    procCommunication(procs),
+    indexedMap_(TypeNull, procs, true)
+{
+    
+}
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::setDataMaps
+(
+    procVector<span<uint32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        std::vector<int32> index;
+
+        freeIndexedMap();
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i];
+            }
+
+            DataType dt;
+            
+            if(! typeCreateIndexedBlock<T>( makeSpan(index), dt)) 
+            {
+                fatalErrorInFunction;
+                return false;
+            }
+            else
+            {
+                indexedMap_[proc] = dt;
+            }
+        }
+    }
+    return true;
+}
+
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::setDataMaps
+(
+    procVector<span<int32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+        
+        freeIndexedMap();
+        
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            DataType dt;
+            if( !typeCreateIndexedBlock<T>(maps[proc], dt) )
+            {
+                fatalErrorInFunction;
+                return false;
+            }
+            else
+            {
+                indexedMap_[proc] = dt;
+            }
+        }
+    }
+    return true;
+}
+
+template<typename T>
+void pFlow::MPI::scatteredMasterDistribute<T>::freeIndexedMap()
+{
+    for(auto i=0; i<indexedMap_.size(); i++)
+    {
+        if(indexedMap_[i]!= TypeNull)
+        {
+            TypeFree(&indexedMap_[i]);
+            indexedMap_[i] = TypeNull;
+        }
+    }
+}
+
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::distribute
+(
+    span<T>& sendBuff, 
+    span<T>& recvb
+)
+{
+    procVector<Request> requests(processors(), true);
+    procVector<Status> statuses(processors(), true);
+
+    if(this->localMaster())
+    {
+        bool res = true;
+        for(int32 i = indexedMap_.size()-1; i>=0; i--)
+        {
+            res = res&&CheckMPI(
+                MPI_Issend( 
+                    sendBuff.data(), 
+                    1, 
+                    indexedMap_[i], 
+                    i, 
+                    0, 
+                    localCommunicator(),
+                    &requests[i]), 
+                false);
+        }
+
+        if(!res)return false;		
+    }
+
+    Status stat;
+    bool sucss = CheckMPI( 
+        MPI_Recv(
+            recvb.data(), 
+            recvb.size()*sFactor<T>(), 
+            Type<T>(), 
+            0, 
+            0, 
+            localCommunicator(),
+            &stat),
+        false);
+            
+    if(this->localMaster())
+    {
+        CheckMPI(
+            MPI_Waitall(requests.size(), requests.data(), statuses.data()),
+            false
+            );
+    }
+
+    return sucss;
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.hpp b/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.hpp
new file mode 100644
index 00000000..dfffb384
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.hpp
@@ -0,0 +1,69 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __scatteredMasterDistribute_hpp__ 
+#define __scatteredMasterDistribute_hpp__
+
+#include "procCommunication.hpp"
+#include "mpiCommunication.hpp"
+#include "procVector.hpp"
+#include "streams.hpp"
+
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class scatteredMasterDistribute
+:
+	public procCommunication
+{
+protected:
+
+	procVector<DataType>			indexedMap_;
+
+    void freeIndexedMap();
+
+public:
+
+	scatteredMasterDistribute(const localProcessors& procs);
+
+	~scatteredMasterDistribute()
+    {
+        freeIndexedMap();
+    }
+
+	scatteredMasterDistribute(const scatteredMasterDistribute&)=delete;
+
+	scatteredMasterDistribute& operator=(const scatteredMasterDistribute&) = delete;
+
+	bool setDataMaps(procVector<span<uint32>>& maps);
+
+    bool setDataMaps(procVector<span<int32>>& maps);
+	
+	bool distribute(span<T>& sendBuff, span<T>& recvb);
+	
+};
+
+} //pFlow::MPI
+
+#include "scatteredMasterDistribute.cpp"
+
+#endif //__scatteredMasterDistribute_hpp__
diff --git a/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.cpp b/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.cpp
new file mode 100644
index 00000000..7579e8d5
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.cpp
@@ -0,0 +1,166 @@
+
+#include "scatteredMasterDistributeChar.hpp"
+
+pFlow::MPI::scatteredMasterDistribute<char>::scatteredMasterDistribute
+(
+    size_t  sizeOfElement,
+    const localProcessors& procs
+)
+:
+    procCommunication(procs),
+    indexedMap_(TypeNull, procs, true),
+    sizeOfElement_(sizeOfElement)
+{}
+
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::setDataMaps
+(
+    procVector<span<uint32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        freeIndexedMap();
+
+        std::vector<MPI_Aint> index;
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i]*sizeOfElement_;
+            }
+            
+            DataType dt;
+            MPI_Type_create_hindexed_block(
+                m.size(), 
+                sizeOfElement_, 
+                index.data(), 
+                MPI_BYTE, 
+                &dt);
+            MPI_Type_commit(&dt);
+                
+            indexedMap_[proc] = dt;
+            
+        }
+    }
+    
+    return true;
+}
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::setDataMaps
+(
+    procVector<span<int32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        std::vector<MPI_Aint> index;
+        freeIndexedMap();
+        
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i]*sizeOfElement_;
+            }
+            
+            DataType dt;
+            MPI_Type_create_hindexed_block(
+                index.size(), 
+                sizeOfElement_, 
+                index.data(), 
+                MPI_CHAR, 
+                &dt);
+            MPI_Type_commit(&dt);
+                
+            indexedMap_[proc] = dt;
+            
+        }
+    }
+
+    return true;
+}
+
+
+void pFlow::MPI::scatteredMasterDistribute<char>::freeIndexedMap()
+{
+    for(auto i=0; i<indexedMap_.size(); i++)
+    {
+        if(indexedMap_[i]!= TypeNull)
+        {
+            TypeFree(&indexedMap_[i]);
+            indexedMap_[i] = TypeNull;
+        }
+    }
+}
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::distribute
+(
+    span<char>& sendBuff, 
+    span<char>& recvb
+)
+{
+    procVector<Request> requests(processors(), true);
+    procVector<Status> statuses(processors(), true);
+
+
+    if(this->localMaster())
+    {
+        bool res = true;
+        for(int32 i = indexedMap_.size()-1; i>=0; i--)
+        {
+            res = res&&CheckMPI(
+                MPI_Issend( 
+                    sendBuff.data(), 
+                    1, 
+                    indexedMap_[i], 
+                    i, 
+                    0, 
+                    localCommunicator(),
+                    &requests[i]), 
+                false);
+        }
+
+        if(!res)return false;		
+    }
+
+    Status stat;
+    bool sucss = CheckMPI( 
+        MPI_Recv(
+            recvb.data(), 
+            recvb.size(), 
+            MPI_CHAR, 
+            0, 
+            0, 
+            localCommunicator(),
+            &stat),
+        true); 
+    
+    if(this->localMaster())
+    {
+        CheckMPI(
+            MPI_Waitall(requests.size(), requests.data(), statuses.data()),
+            false
+            );
+    }
+    
+    return sucss;
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.hpp b/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.hpp
new file mode 100644
index 00000000..e0cee3b4
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.hpp
@@ -0,0 +1,67 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __scatteredMasterDistributeChar_hpp__ 
+#define __scatteredMasterDistributeChar_hpp__
+
+#include "scatteredMasterDistribute.hpp"
+
+namespace pFlow::MPI
+{
+
+template<>
+class scatteredMasterDistribute<char>
+:
+	public procCommunication
+{
+protected:
+
+	procVector<DataType>			indexedMap_;
+
+    size_t                          sizeOfElement_;
+
+    void freeIndexedMap();
+
+public:
+
+	scatteredMasterDistribute(
+        size_t sizeOfElement, 
+        const localProcessors& procs);
+
+	~scatteredMasterDistribute()
+    {
+        freeIndexedMap();
+    }
+
+	scatteredMasterDistribute(const scatteredMasterDistribute&)=delete;
+
+	scatteredMasterDistribute& operator=(const scatteredMasterDistribute&) = delete;
+
+	bool setDataMaps(procVector<span<uint32>>& maps);
+
+    bool setDataMaps(procVector<span<int32>>& maps);
+	
+	bool distribute(span<char>& sendBuff, span<char>& recvb);
+	
+};
+
+} // pFlow::MPI
+
+#endif //__scatteredMasterDistributeChar_hpp__

From 656e03de360e345712740018d1b937416b461842 Mon Sep 17 00:00:00 2001
From: Hamidreza Norouzi <hamid.r.norouzi@gmail.com>
Date: Sun, 21 Jan 2024 13:23:45 -0800
Subject: [PATCH 02/14] bug fix for empty domains in partitioning (tested)

---
 .../domain/MPISimulationDomain.cpp            | 27 ++++++++++++++-----
 .../domain/MPISimulationDomain.hpp            |  3 +++
 .../MPIParallelization/rcb1DPartitioning.cpp  |  6 +++++
 .../MPIParallelization/rcb1DPartitioning.hpp  |  5 ++--
 4 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
index da66b8c2..93583714 100644
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
@@ -60,16 +60,26 @@ bool pFlow::MPISimulationDomain::createBoundaryDicts()
 			"in dictionary "<< boundaries.globalName()<<endl;
 			return false;
 		}
-
-		if( neighbors[i] == -1 )
+        if( initialThisDomainActive() )
         {
-            bDict.add("mirrorProcessorNo", processors::globalRank());
+            if( neighbors[i] == -1 )
+            {
+                bDict.add("mirrorProcessorNo", processors::globalRank());
+            }
+            else
+            {
+                bDict.add("mirrorProcessorNo", neighbors[i]);
+                bDict.addOrReplace("type", "processor");
+            }
+            warningInFunction<<"replace the method initialThisDomainActive()"<<endl;
         }
         else
         {
-            bDict.add("mirrorProcessorNo", neighbors[i]);
-            bDict.addOrReplace("type", "processor");
+            bDict.add("mirrorProcessorNo", processors::globalRank());
+            bDict.addOrReplace("type", "none");
+            warningInFunction<<"None: replace the method initialThisDomainActive()"<<endl;
         }
+		
 	}
 
     return true;
@@ -229,7 +239,12 @@ pFlow::uint32 pFlow::MPISimulationDomain::initialNumberInThis() const
 {
     uint32 numImport = domainPartition_->numberImportThisProc();
     uint32 numExport = domainPartition_->numberExportThisProc();
-    return initialNumPoints_+ numImport - numExport;;
+    return max(initialNumPoints_+ numImport - numExport, 0u);
+}
+
+bool pFlow::MPISimulationDomain::initialThisDomainActive() const
+{
+    return initialNumberInThis()>0;
 }
 
 bool pFlow::MPISimulationDomain::initialTransferBlockData
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
index b47a9201..feef591c 100644
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
@@ -70,11 +70,14 @@ public:
     /// @param pointPos 
     /// @return 
     bool initialUpdateDomains(span<realx3> pointPos)override;
+    
 
     /// @brief 
     /// @return 
     uint32 initialNumberInThis()const override;
 
+    bool initialThisDomainActive()const override;
+
     bool initialTransferBlockData(
 		span<char> src, 
 		span<char> dst,
diff --git a/src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp b/src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp
index 3a22ae1f..c2345ab6 100644
--- a/src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp
+++ b/src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp
@@ -97,6 +97,12 @@ bool pFlow::rcb1DPartitioning::partition(pointCollection &points)
 
   	localBox_ = globalBox_;
 
+    if(equal(x0, x1))
+    {
+        x0 = x0 - 0.00001;
+        x1 = x1 + 0.00001;
+    }
+
     switch (direction_)
     {
     case Direction::X :
diff --git a/src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp b/src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp
index ad5f7693..b58532e3 100644
--- a/src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp
+++ b/src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp
@@ -60,9 +60,8 @@ public:
 		const dictionary& dict, 
 		const box& globalBox);
 	
-	
-	virtual 
-	~rcb1DPartitioning()=default;
+
+	~rcb1DPartitioning() override=default;
 
     span<int32> exportList(int procNo)const override
     {

From 94fcc3d01bcf2b230e9b53d914e789b167f2d37d Mon Sep 17 00:00:00 2001
From: Hamidreza Norouzi <hamid.r.norouzi@gmail.com>
Date: Sat, 27 Apr 2024 08:44:35 -0700
Subject: [PATCH 03/14] MPI devleopment with boundaries for contact search and
 data communication, memory leak

---
 .../MPIParallelization/MPI/gatherMaster.hpp   | 106 +++++
 .../MPI/mpiCommunication.hpp                  | 427 ++++++++++++++++++
 .../MPIParallelization/MPI/mpiTypes.hpp       |  69 +++
 .../MPI/procCommunication.cpp                 |  30 ++
 .../MPI/procCommunication.hpp                 | 178 ++++++++
 .../MPIParallelization/MPI/procVector.hpp     | 199 ++++++++
 .../MPI/scatteredMasterDistribute.cpp         | 158 +++++++
 .../MPI/scatteredMasterDistribute.hpp         |  67 +++
 .../MPI/scatteredMasterDistributeChar.cpp     | 166 +++++++
 .../MPI/scatteredMasterDistributeChar.hpp     |  66 +++
 .../dataIOMPI/dataIOMPI.cpp                   |  52 +++
 .../dataIOMPI/dataIOMPI.hpp                   |  58 +++
 .../dataIOMPI/dataIOMPIs.cpp                  |  27 ++
 .../domain/MPISimulationDomain.cpp            | 191 ++++----
 .../domain/MPISimulationDomain.hpp            | 147 +++---
 .../domain/partitioning/partitioning.cpp      | 113 +++++
 .../domain/partitioning/partitioning.hpp      | 168 +++++++
 .../domain/partitioning/rcb1DPartitioning.cpp | 330 ++++++++++++++
 .../domain/partitioning/rcb1DPartitioning.hpp | 240 ++++++++++
 .../MPIParallelization/mpiCommunication.hpp   |   6 -
 .../pointField/processorBoundaryField.cpp     | 110 +++++
 .../pointField/processorBoundaryField.hpp     | 113 +++++
 .../pointField/processorBoundaryFields.cpp    |  24 +
 .../boundaries/boundaryProcessor.cpp          | 148 ++++++
 .../boundaries/boundaryProcessor.hpp          | 116 +++++
 .../boundaries/dataReciever.hpp               | 108 +++++
 .../pointStructure/boundaries/dataSender.hpp  | 120 +++++
 27 files changed, 3379 insertions(+), 158 deletions(-)
 create mode 100644 src/phasicFlow/MPIParallelization/MPI/gatherMaster.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/MPI/procCommunication.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/MPI/procCommunication.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/MPI/procVector.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPIs.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/pointField/processorBoundaryFields.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
 create mode 100644 src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
 create mode 100644 src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp

diff --git a/src/phasicFlow/MPIParallelization/MPI/gatherMaster.hpp b/src/phasicFlow/MPIParallelization/MPI/gatherMaster.hpp
new file mode 100644
index 00000000..dc87ec01
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/gatherMaster.hpp
@@ -0,0 +1,106 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __gatherMaster_hpp__
+#define __gatherMaster_hpp__
+
+#include <numeric>
+
+#include "procCommunication.hpp"
+#include "stdVectorHelper.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class gatherMaster
+:
+    public procCommunication
+{
+protected:
+
+    std::vector<T> buffer_;
+
+public:
+
+    gatherMaster(const localProcessors& procs)
+    :
+        procCommunication(procs)
+    {}
+
+    span<T> getData()
+    {
+        if(this->localMaster())
+            return span<T>( buffer_.data(), buffer_.size());
+        else
+            return span<T>(nullptr, 0);
+    }
+
+    std::vector<T> moveData()
+    {
+        return std::move(buffer_);
+    }
+
+    bool gatherData(span<T> data)
+    {
+        int thisN = data.size();
+
+        bool succss;
+        
+        procVector<int> numElems(this->processors(), true);
+        procVector<int> displ(this->processors(), true);      
+        
+        if( !this->collectAllToMaster(thisN, numElems) )
+        {
+            fatalErrorInFunction<<
+            "error in collecting number of elements from processors"<<endl;
+            return false;
+        }
+        auto totalN = std::accumulate(
+            numElems.begin(), 
+            numElems.end(),
+            static_cast<int>(0));
+        
+        buffer_.resize(totalN);
+    
+        std::exclusive_scan(
+            numElems.begin(), 
+            numElems.end(),
+            displ.begin(),
+            0);
+        
+        auto bufferSpan = span<T>(this->buffer_.data(),this->buffer_.size() );
+
+        return CheckMPI( 
+            Gatherv(
+                data, 
+                bufferSpan, 
+                numElems.getSpan(), 
+                displ.getSpan(), 
+                this->localMasterNo(), 
+                this->localCommunicator()),
+            false);
+        
+    }
+
+
+};
+}
+
+#endif
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp b/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
new file mode 100644
index 00000000..4fd5e260
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
@@ -0,0 +1,427 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __mpiCommunication_H__
+#define __mpiCommunication_H__
+
+
+#include "mpiTypes.hpp"
+#include "types.hpp"
+#include "span.hpp"
+
+
+
+namespace pFlow::MPI
+{
+
+extern DataType realx3Type__;
+
+extern DataType realx4Type__;
+
+extern DataType int32x3Type__;
+
+template<typename T> 
+auto constexpr Type()
+{
+	return MPI_BYTE;
+}
+
+template<typename T>
+auto constexpr sFactor()
+{
+	return sizeof(T);
+}
+
+template<char> 
+auto constexpr Type()
+{
+	return MPI_CHAR;
+}
+template<char>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<short>
+auto constexpr Type()
+{
+	return MPI_SHORT;
+}
+template<short>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<unsigned short>
+auto constexpr Type()
+{
+	return MPI_UNSIGNED_SHORT;
+}
+template<unsigned short>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<int>
+auto constexpr Type()
+{
+	return MPI_INT;
+}
+template<int>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<unsigned int>()
+{
+	return MPI_UNSIGNED;
+}
+template<>
+auto constexpr sFactor<unsigned int>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<long>()
+{
+	return MPI_LONG;
+}
+template<>
+auto constexpr sFactor<long>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<unsigned long>()
+{
+	return MPI_UNSIGNED_LONG;
+}
+template<>
+auto constexpr sFactor<unsigned long>()
+{
+	return 1;
+}
+
+
+template<>
+auto constexpr Type<float>()
+{
+	return MPI_FLOAT;
+}
+template<>
+auto constexpr sFactor<float>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<double>()
+{
+	return MPI_DOUBLE;
+}
+template<>
+auto constexpr sFactor<double>()
+{
+	return 1;
+}
+
+template<>
+inline
+auto Type<realx3>()
+{
+	return realx3Type__;
+}
+
+template<>
+auto constexpr sFactor<realx3>()
+{
+	return 1;
+}
+
+template<>
+inline
+auto Type<realx4>()
+{
+	return realx4Type__;
+}
+
+template<>
+auto constexpr sFactor<realx4>()
+{
+	return 1;
+}
+
+
+template<>
+inline
+auto Type<int32x3>()
+{
+	return int32x3Type__;
+}
+
+
+template<>
+auto constexpr sFactor<int32x3>()
+{
+	return 1;
+}
+
+/*inline 
+auto createByteSequence(int sizeOfElement)
+{
+    DataType newType;
+    MPI_Type_contiguous(sizeOfElement, MPI_CHAR, &newType);
+	MPI_Type_commit(&newType);
+    return newType;
+}*/
+
+inline 
+auto TypeCommit(DataType* type)
+{
+	return MPI_Type_commit(type);
+}
+
+inline 
+auto TypeFree(DataType* type)
+{
+    return MPI_Type_free(type);
+
+}
+template<typename T>
+inline auto getCount(Status* status, int& count)
+{
+	int lCount;
+	auto res = MPI_Get_count(status, Type<T>(), &lCount);
+	count = lCount/sFactor<T>();
+	return res;
+}
+
+template<typename T>
+inline int convertIndex(const int& ind)
+{
+	return ind*sFactor<T>();
+}
+
+template<typename T> 
+inline auto send(span<T> data, int dest, int tag, Comm comm)
+{
+	return MPI_Send(
+        data.data(), 
+        sFactor<T>()*data().size(), 
+        Type<T>(), 
+        dest, 
+        tag, 
+        comm);
+}
+
+template<typename T>
+inline auto Isend(span<T> data, int dest, int tag, Comm comm, Request* req)
+{
+	return MPI_Isend(
+		data.data(), 
+		sFactor<T>()*data.size(), 
+		Type<T>(), 
+		dest, 
+		tag, 
+		comm, 
+		req);
+}
+
+template<typename T>
+inline auto Isend(const T& data, int dest, int tag, Comm comm, Request* req)
+{
+	return MPI_Isend(
+		&data, 
+		sFactor<T>(), 
+		Type<T>(), 
+		dest, 
+		tag, 
+		comm, 
+		req);
+}
+
+template<typename T>
+inline auto recv(span<T> data, int source, int tag, Comm comm, Status *status)
+{
+	return MPI_Recv(
+        data.data(), 
+        sFactor<T>()*data.size(), 
+        Type<T>(), 
+        source, 
+        tag, 
+        comm, 
+        status);
+}
+
+template<typename T>
+inline auto Irecv(T& data, int source, int tag, Comm comm, Request* req)
+{
+	return MPI_Irecv(
+		&data,
+		sFactor<T>(),
+		Type<T>(),
+		source,
+		tag, 
+		comm,
+		req);
+}
+
+template<typename T>
+inline auto Irecv(span<T> data, int source, int tag, Comm comm, Request* req)
+{
+	return MPI_Irecv(
+		data.data(),
+		sFactor<T>()*data.size(),
+		Type<T>(),
+		source,
+		tag, 
+		comm,
+		req);
+}
+
+template<typename T>
+inline auto scan(T sData, T& rData, Comm comm, Operation op = SumOp)
+{
+	return MPI_Scan(&sData, &rData, sFactor<T>()*1, Type<T>(), op , comm );
+}
+
+// gathering one scalar data to root processor 
+template<typename T>
+inline auto gather(T sendData, span<T>& recvData, int root, Comm comm)
+{
+	return MPI_Gather(
+		&sendData, 
+		sFactor<T>()*1, 
+		Type<T>(), 
+		recvData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		root,
+		comm);
+}
+
+template<typename T>
+inline auto allGather(T sendData, span<T>& recvData, Comm comm)
+{
+	return MPI_Allgather(
+		&sendData,
+		sFactor<T>()*1,
+		Type<T>(),
+		recvData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		comm);
+}
+
+template<typename T>
+inline auto scatter(span<T> sendData, T& recvData, int root, Comm comm)
+{
+	return MPI_Scatter(
+		sendData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		&recvData,
+		sFactor<T>()*1,
+		Type<T>(),
+		root,
+		comm);
+}
+
+template<typename T>
+inline auto Bcast(T& sendData, int root, Comm comm)
+{
+	return MPI_Bcast(
+		&sendData, sFactor<T>()*1, Type<T>(), root, comm);
+
+}
+
+
+template<typename T> 
+bool typeCreateIndexedBlock(
+    span<int32> index, 
+    DataType &newType)
+{
+    auto res =  MPI_Type_create_indexed_block(
+        index.size(), 
+        sFactor<T>(), 
+        index.data(), 
+        Type<T>(), 
+        &newType);
+    
+    if(res == Success)
+    {
+        TypeCommit(&newType);
+    }
+    else
+    {
+        return false;
+    }
+
+    return true;	
+}
+
+
+template<typename T>
+inline auto Gatherv
+(
+    span<T> sendData, 
+    span<T>& recvData, 
+    span<int> recvCounts,
+    span<int> displs,
+    int root, 
+    Comm comm)
+{
+    
+    return MPI_Gatherv(
+        sendData.data(), 
+        sendData.size()*sFactor<T>(),
+        Type<T>(),
+        recvData.data(),
+        recvCounts.data(),
+        displs.data(),
+        Type<T>(),
+        root,
+        comm
+         );
+
+}
+
+inline auto Wait(Request* request, Status* status)
+{
+	return MPI_Wait(request, status);
+}
+
+inline auto typeFree(DataType& type)
+{
+	return MPI_Type_free(&type);
+}
+
+
+}
+
+
+#endif  //__mpiCommunication_H__
diff --git a/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp b/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp
new file mode 100644
index 00000000..873dd7eb
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp
@@ -0,0 +1,69 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __mpiTypes_H__
+#define __mpiTypes_H__
+
+
+
+#include <mpi.h>
+
+namespace pFlow::MPI
+{
+	// types
+	using Comm 			= MPI_Comm;
+    using Group         = MPI_Group;
+	using Status 		= MPI_Status;
+	using Offset 		= MPI_Offset;
+	using Request 		= MPI_Request;
+	using Operation 	= MPI_Op;
+	using Information	= MPI_Info;
+	using DataType 		= MPI_Datatype;
+	
+	inline Comm CommWorld 		= MPI_COMM_WORLD;
+
+	// all nulls
+
+	inline auto ProcNull 		= MPI_PROC_NULL;
+	inline auto InfoNull  		= MPI_INFO_NULL;
+	inline auto RequestNull		= MPI_REQUEST_NULL;
+	inline auto StatusIgnore 	= MPI_STATUS_IGNORE;
+	inline auto StatusesIgnore 	= MPI_STATUSES_IGNORE;
+	inline auto FileNull 		= MPI_FILE_NULL;
+	inline Comm  CommNull 		= MPI_COMM_NULL;
+    inline auto TypeNull        = MPI_DATATYPE_NULL;
+
+	// errors
+	inline const auto Success 	= MPI_SUCCESS;
+	inline const auto ErrOp 	= MPI_ERR_OP;
+
+	inline const auto SumOp		= MPI_SUM;
+
+	inline const size_t MaxNoProcessors = 2048;
+	
+}
+
+
+
+
+
+
+
+#endif //__mpiTypes_H__
diff --git a/src/phasicFlow/MPIParallelization/MPI/procCommunication.cpp b/src/phasicFlow/MPIParallelization/MPI/procCommunication.cpp
new file mode 100644
index 00000000..81869453
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/procCommunication.cpp
@@ -0,0 +1,30 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "procCommunication.hpp"
+
+
+pFlow::MPI::procCommunication::procCommunication
+(
+    const localProcessors& proc
+)
+:
+    processors_(proc)
+{}
diff --git a/src/phasicFlow/MPIParallelization/MPI/procCommunication.hpp b/src/phasicFlow/MPIParallelization/MPI/procCommunication.hpp
new file mode 100644
index 00000000..80c0f513
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/procCommunication.hpp
@@ -0,0 +1,178 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __procCommunication_hpp__
+#define __procCommunication_hpp__
+
+
+#include "procVector.hpp"
+#include "localProcessors.hpp"
+#include "mpiCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+
+class procCommunication
+{
+protected:
+
+	const localProcessors& processors_;
+
+public:	
+
+    procCommunication(const localProcessors& proc);
+    	
+	~procCommunication()=default;
+    
+    /// @brief Tell if this processor is master processor in the local
+    /// communicator 
+    /// @return true if this processor is master  
+
+    inline 
+    const auto& processors()const
+    {
+        return processors_;
+    }
+
+    inline
+    bool localMaster()const
+    {
+        return processors_.localMaster();;
+    }
+
+    inline
+    auto localSize()const
+    {
+        return processors_.localSize();
+    }
+
+    inline
+    auto localRank()const
+    {
+        return processors_.localRank();
+    }
+
+    inline
+    auto localCommunicator()const
+    {
+        return processors_.localCommunicator();
+    }
+
+    /// @brief return the master number in the local communicator  
+    auto localMasterNo()const
+    {
+        return processors_.localMasterNo();
+    }
+
+	/// Send a single val to all processors including itself (local communicator)
+	template<typename T>
+	std::pair<T,bool> distributeMasterToAll(const T& val)
+	{
+		
+		T retVal = val;
+		auto res = CheckMPI(
+            Bcast(retVal, localMasterNo(),localCommunicator() ),
+            false);
+		
+		return {retVal, res};
+	}
+
+	/// @brief  Send a single value to all processor including master (in local communicator)
+	/// @param val value to be sent
+	/// @param recvVal recieved value 
+	/// @return true if successful and false if fail
+	template<typename T>
+	bool distributeMasterToAll(const T& val, T& recvVal)
+	{
+		recvVal = val;
+		return CheckMPI(
+            Bcast(recvVal, localMasterNo(), localCommunicator()),
+            false);
+	}
+
+	/// @brief  values in the vector (size is equal to number of 
+    // processors in local communicator) to each processor 
+	template<typename T>
+	std::pair<T,bool> distributeMasterToAll(const procVector<T>& vals)
+	{
+		T val;	
+		auto vec = vals.getSpan();
+		auto res = CheckMPI(
+            scatter(vec, val, localMasterNo(), localCommunicator()),
+            false);
+		
+		return {val, res};
+	}
+
+    /// @brief Each processor in the local communicator calls this funtion with a value 
+    /// and the values are distributed among all processors 
+	template<typename T>
+	std::pair<procVector<T>, bool> collectAllToAll(const T& val)
+	{
+		procVector<T> allVec(processors_);
+		auto vec = allVec.getSpan();
+		auto res = CheckMPI(
+            allGather(val, vec, localCommunicator()), 
+            false);
+		return {allVec, res};
+	}
+
+    /// @brief Each processor in the local communicator calls this funtion with a value 
+    /// and the values are distributed among all processors 
+	template<typename T>
+	bool collectAllToAll(const T& val, procVector<T>& allVec)
+	{
+		auto vec = allVec.getSpan();
+		return CheckMPI(
+            allGather(val, vec, localCommunicator()), 
+            false);
+	}
+
+    /// @brief Each processor in the local communicator calls this function with a value
+    /// and all values are collected in the master processor 
+	template<typename T>
+	std::pair<procVector<T>,bool> collectAllToMaster(const T& val)
+	{
+		// only on master processor
+		procVector<T> masterVec(processors_, true);
+		
+		auto masterSpan = masterVec.getSpan();
+		auto res = CheckMPI( 
+			gather(val,masterSpan, localMasterNo(), localCommunicator()), 
+            false);
+
+		return {masterVec, res};
+
+	}
+
+    template<typename T>
+	bool collectAllToMaster(const T& val, procVector<T>& masterVec)
+	{
+		// only on master processor
+		auto [vec, res] = collectAllToMaster(val);
+        masterVec = vec;
+		return res;
+	}
+
+}; //procCommunication
+
+} // pFlow::MPI
+
+#endif //__procCommunication_hpp__
diff --git a/src/phasicFlow/MPIParallelization/MPI/procVector.hpp b/src/phasicFlow/MPIParallelization/MPI/procVector.hpp
new file mode 100644
index 00000000..f9a80037
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/procVector.hpp
@@ -0,0 +1,199 @@
+#ifndef __procVector_hpp__ 
+#define __procVector_hpp__
+
+// from PhasicFlow
+
+#include "localProcessors.hpp"
+#include "span.hpp"
+#include "streams.hpp"
+#include "IOPattern.hpp"
+
+#include "mpiTypes.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class procVector
+:
+	public std::vector<T>
+{
+public:
+
+	using ProcVectorType = procVector<T>;
+
+	using VectorType = std::vector<T>;
+
+protected:
+
+	int 	rank_ 	= 0;
+
+    bool 	isMaster_ = false;
+
+	using VectorType::reserve;
+
+	using VectorType::resize;
+
+	using VectorType::assign;
+
+	using VectorType::clear;
+
+	using VectorType::erase;
+
+public:
+
+	procVector(
+		const localProcessors& procs,
+		bool onlyMaster = false)
+    :
+        rank_(procs.localRank()),
+        isMaster_(procs.localMaster())
+	{
+       
+		if( onlyMaster && !isMaster_ ) return;
+		this->reserve(procs.localSize());
+		this->resize(procs.localSize());
+	}
+
+	procVector(
+		const T& val,
+		const localProcessors& procs,
+		bool onlyMaster = false)
+	:
+		procVector(procs, onlyMaster)
+	{
+		std::fill(this->begin(), this->end(), val);
+	}
+
+    procVector(const T& val, const procVector& src)
+    {
+        this->reserve(src.size());
+        this->resize(src.size());
+        std::fill(this->begin(), this->end(), val);
+    }
+
+    procVector(const localProcessors& procs, const VectorType& src)
+	:
+		procVector(procs)
+	{
+		if(src.size()!= this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in construction"<<endl;
+			fatalExit;
+		}
+
+        this->assign(src.begin(), src.end());
+	} 
+        
+	procVector(const procVector&) = default;
+	
+	procVector(procVector&&) = default;
+
+	procVector& operator=(const procVector&) = default;
+	
+	procVector& operator=(procVector&&) = default;
+
+	procVector& operator=(const VectorType& src)
+	{
+		if(src.size() != this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in copy assignment"<<endl;
+			fatalExit;
+		}
+
+		static_cast<VectorType&>(*this).operator=(src);
+		return *this;
+	}
+
+	procVector& operator=(VectorType&& src)
+	{
+		if(src.size() != this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in move assignment"
+			<<endl;
+			fatalExit;
+		}
+
+		static_cast<VectorType&>(*this).operator=(std::move(src));
+		return *this;
+	}
+
+	procVector(const localProcessors& procs, VectorType&& src)
+	:
+		VectorType(std::move(src))
+	{
+		if(this->size()!= static_cast<size_t>(procs.localSize()))
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in move"<<endl;
+            fatalExit;
+		}
+        isMaster_ = procs.localMaster();
+        rank_ = procs.localRank();
+	}
+
+	~procVector()=default;
+
+    inline
+	auto& thisValue()
+	{
+		return VectorType::operator[](rank_);
+	}
+
+    inline
+	const auto& thisValue()const
+	{
+		return VectorType::operator[](rank_);
+	}
+
+    inline
+	auto size()const
+	{
+		return VectorType::size();
+	}
+
+    inline
+	auto rank()const
+	{
+		return rank_;
+	}
+
+	inline
+	auto getSpan()
+	{
+		return span<T>(this->data(), this->size());
+	}
+	
+	inline 
+	auto getSpan()const
+	{
+		return span<T>(const_cast<T*>(this->data()), this->size());
+	}
+
+    bool write(
+        iOstream& os,
+        const IOPattern& iop ) const
+    {
+        return writeStdVector(os, *this, iop);	
+    }
+
+};
+
+template<typename T> 
+inline iOstream& operator << (iOstream& os, const procVector<T>& ovec )
+{	
+	if( !ovec.write(os, IOPattern::AllProcessorsDifferent) )
+	{
+		ioErrorInFile(os.name(), os.lineNumber());
+		fatalExit;
+	}
+	return os; 
+}
+
+}
+
+
+#endif
diff --git a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.cpp b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.cpp
new file mode 100644
index 00000000..a771dc54
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.cpp
@@ -0,0 +1,158 @@
+
+
+template<typename T>
+pFlow::MPI::scatteredMasterDistribute<T>::scatteredMasterDistribute
+(
+    const localProcessors& procs
+)
+:
+    procCommunication(procs),
+    indexedMap_(TypeNull, procs, true)
+{
+    
+}
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::setDataMaps
+(
+    procVector<span<uint32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        std::vector<int32> index;
+
+        freeIndexedMap();
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i];
+            }
+
+            DataType dt;
+            
+            if(! typeCreateIndexedBlock<T>( makeSpan(index), dt)) 
+            {
+                fatalErrorInFunction;
+                return false;
+            }
+            else
+            {
+                indexedMap_[proc] = dt;
+            }
+        }
+    }
+    return true;
+}
+
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::setDataMaps
+(
+    procVector<span<int32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+        
+        freeIndexedMap();
+        
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            DataType dt;
+            if( !typeCreateIndexedBlock<T>(maps[proc], dt) )
+            {
+                fatalErrorInFunction;
+                return false;
+            }
+            else
+            {
+                indexedMap_[proc] = dt;
+            }
+        }
+    }
+    return true;
+}
+
+template<typename T>
+void pFlow::MPI::scatteredMasterDistribute<T>::freeIndexedMap()
+{
+    for(auto i=0; i<indexedMap_.size(); i++)
+    {
+        if(indexedMap_[i]!= TypeNull)
+        {
+            TypeFree(&indexedMap_[i]);
+            indexedMap_[i] = TypeNull;
+        }
+    }
+}
+
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::distribute
+(
+    span<T>& sendBuff, 
+    span<T>& recvb
+)
+{
+    procVector<Request> requests(processors(), true);
+    procVector<Status> statuses(processors(), true);
+
+    if(this->localMaster())
+    {
+        bool res = true;
+        for(int32 i = indexedMap_.size()-1; i>=0; i--)
+        {
+            res = res&&CheckMPI(
+                MPI_Issend( 
+                    sendBuff.data(), 
+                    1, 
+                    indexedMap_[i], 
+                    i, 
+                    0, 
+                    localCommunicator(),
+                    &requests[i]), 
+                false);
+        }
+
+        if(!res)return false;		
+    }
+
+    Status stat;
+    bool sucss = CheckMPI( 
+        MPI_Recv(
+            recvb.data(), 
+            recvb.size()*sFactor<T>(), 
+            Type<T>(), 
+            0, 
+            0, 
+            localCommunicator(),
+            &stat),
+        false);
+            
+    if(this->localMaster())
+    {
+        CheckMPI(
+            MPI_Waitall(requests.size(), requests.data(), statuses.data()),
+            false
+            );
+    }
+
+    return sucss;
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.hpp b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.hpp
new file mode 100644
index 00000000..146ce56c
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.hpp
@@ -0,0 +1,67 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __scatteredMasterDistribute_hpp__
+#define __scatteredMasterDistribute_hpp__
+
+#include "mpiCommunication.hpp"
+#include "procCommunication.hpp"
+#include "procVector.hpp"
+#include "stdVectorHelper.hpp"
+#include "streams.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class scatteredMasterDistribute : public procCommunication
+{
+protected:
+
+	procVector<DataType> indexedMap_;
+
+	void freeIndexedMap();
+
+public:
+
+	scatteredMasterDistribute(const localProcessors& procs);
+
+	~scatteredMasterDistribute()
+	{
+		freeIndexedMap();
+	}
+
+	scatteredMasterDistribute(const scatteredMasterDistribute&) = delete;
+
+	scatteredMasterDistribute& operator=(const scatteredMasterDistribute&) =
+	  delete;
+
+	bool setDataMaps(procVector<span<uint32>>& maps);
+
+	bool setDataMaps(procVector<span<int32>>& maps);
+
+	bool distribute(span<T>& sendBuff, span<T>& recvb);
+};
+
+} // pFlow::MPI
+
+#include "scatteredMasterDistribute.cpp"
+
+#endif //__scatteredMasterDistribute_hpp__
diff --git a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.cpp b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
new file mode 100644
index 00000000..7579e8d5
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
@@ -0,0 +1,166 @@
+
+#include "scatteredMasterDistributeChar.hpp"
+
+pFlow::MPI::scatteredMasterDistribute<char>::scatteredMasterDistribute
+(
+    size_t  sizeOfElement,
+    const localProcessors& procs
+)
+:
+    procCommunication(procs),
+    indexedMap_(TypeNull, procs, true),
+    sizeOfElement_(sizeOfElement)
+{}
+
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::setDataMaps
+(
+    procVector<span<uint32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        freeIndexedMap();
+
+        std::vector<MPI_Aint> index;
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i]*sizeOfElement_;
+            }
+            
+            DataType dt;
+            MPI_Type_create_hindexed_block(
+                m.size(), 
+                sizeOfElement_, 
+                index.data(), 
+                MPI_BYTE, 
+                &dt);
+            MPI_Type_commit(&dt);
+                
+            indexedMap_[proc] = dt;
+            
+        }
+    }
+    
+    return true;
+}
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::setDataMaps
+(
+    procVector<span<int32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        std::vector<MPI_Aint> index;
+        freeIndexedMap();
+        
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i]*sizeOfElement_;
+            }
+            
+            DataType dt;
+            MPI_Type_create_hindexed_block(
+                index.size(), 
+                sizeOfElement_, 
+                index.data(), 
+                MPI_CHAR, 
+                &dt);
+            MPI_Type_commit(&dt);
+                
+            indexedMap_[proc] = dt;
+            
+        }
+    }
+
+    return true;
+}
+
+
+void pFlow::MPI::scatteredMasterDistribute<char>::freeIndexedMap()
+{
+    for(auto i=0; i<indexedMap_.size(); i++)
+    {
+        if(indexedMap_[i]!= TypeNull)
+        {
+            TypeFree(&indexedMap_[i]);
+            indexedMap_[i] = TypeNull;
+        }
+    }
+}
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::distribute
+(
+    span<char>& sendBuff, 
+    span<char>& recvb
+)
+{
+    procVector<Request> requests(processors(), true);
+    procVector<Status> statuses(processors(), true);
+
+
+    if(this->localMaster())
+    {
+        bool res = true;
+        for(int32 i = indexedMap_.size()-1; i>=0; i--)
+        {
+            res = res&&CheckMPI(
+                MPI_Issend( 
+                    sendBuff.data(), 
+                    1, 
+                    indexedMap_[i], 
+                    i, 
+                    0, 
+                    localCommunicator(),
+                    &requests[i]), 
+                false);
+        }
+
+        if(!res)return false;		
+    }
+
+    Status stat;
+    bool sucss = CheckMPI( 
+        MPI_Recv(
+            recvb.data(), 
+            recvb.size(), 
+            MPI_CHAR, 
+            0, 
+            0, 
+            localCommunicator(),
+            &stat),
+        true); 
+    
+    if(this->localMaster())
+    {
+        CheckMPI(
+            MPI_Waitall(requests.size(), requests.data(), statuses.data()),
+            false
+            );
+    }
+    
+    return sucss;
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.hpp b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.hpp
new file mode 100644
index 00000000..0ea1a770
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.hpp
@@ -0,0 +1,66 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __scatteredMasterDistributeChar_hpp__
+#define __scatteredMasterDistributeChar_hpp__
+
+#include "scatteredMasterDistribute.hpp"
+
+namespace pFlow::MPI
+{
+
+template<>
+class scatteredMasterDistribute<char> : public procCommunication
+{
+protected:
+
+	procVector<DataType> indexedMap_;
+
+	size_t               sizeOfElement_;
+
+	void freeIndexedMap();
+
+public:
+
+	scatteredMasterDistribute(
+	  size_t                 sizeOfElement,
+	  const localProcessors& procs
+	);
+
+	~scatteredMasterDistribute()
+	{
+		freeIndexedMap();
+	}
+
+	scatteredMasterDistribute(const scatteredMasterDistribute&) = delete;
+
+	scatteredMasterDistribute& operator=(const scatteredMasterDistribute&) =
+	  delete;
+
+	bool setDataMaps(procVector<span<uint32>>& maps);
+
+	bool setDataMaps(procVector<span<int32>>& maps);
+
+	bool distribute(span<char>& sendBuff, span<char>& recvb);
+};
+
+} // pFlow::MPI
+
+#endif //__scatteredMasterDistributeChar_hpp__
diff --git a/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.cpp b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.cpp
new file mode 100644
index 00000000..eb5e074c
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.cpp
@@ -0,0 +1,52 @@
+
+template<typename T>
+bool pFlow::MPI::dataIOMPI<T>::gatherData(span<T> data )
+{
+	
+	if(this->ioPattern_.isAllProcessorsDifferent())
+	{
+		this->bufferSpan_ = data;
+		return true;
+	}
+
+	if( this->ioPattern_.isMasterProcessorDistribute())
+	{
+		
+		auto gatherT = pFlow::MPI::gatherMaster<T>(pFlowProcessors());
+		
+		if(!gatherT.gatherData(data))
+		{
+			fatalErrorInFunction<<"Error in gathering data to master"<<endl;
+			return false;
+		}
+
+		this->buffer_ = gatherT.moveData();
+		this->bufferSpan_ = span<T>(this->buffer_.data(),this->buffer_.size() );
+
+		return true;
+	
+	}
+
+	if( this->ioPattern_.isMasterProcessorOnly() || this->ioPattern_.isAllProcessorSimilar() )
+	{
+		if( this->ioPattern_.isMaster() )
+		{
+			this->bufferSpan_ = data;
+			return true;
+		}
+		else
+		{
+			this->bufferSpan_ = span<T>(nullptr, 0);
+			return true;
+		}
+	}
+
+	return false;
+	
+}
+
+template<typename T>
+pFlow::MPI::dataIOMPI<T>::dataIOMPI(const IOPattern& iop)
+:
+	dataIO<T>(iop)
+{}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.hpp b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.hpp
new file mode 100644
index 00000000..1bfeb94d
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.hpp
@@ -0,0 +1,58 @@
+#ifndef __datIOMPI_hpp__
+#define __datIOMPI_hpp__
+
+#include "dataIO.hpp"
+#include "pFlowProcessors.hpp"
+#include "gatherMaster.hpp"
+
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class dataIOMPI
+:
+	public dataIO<T>
+{
+public:
+
+    using DataIOType = dataIO<T>;
+
+    using DataIOMPIType = dataIOMPI<T>;
+
+protected:
+    
+    bool gatherData(span<T> data ) override;
+    
+public:
+
+	TypeInfoTemplate111("dataIO",T,"MPI");
+
+	explicit dataIOMPI(const IOPattern& iop);
+    
+	dataIOMPI(const dataIOMPI&) = default;
+
+	dataIOMPI(dataIOMPI&&) = default;
+
+
+	dataIOMPI& operator=(const dataIOMPI&) = default;
+
+	dataIOMPI& operator=(dataIOMPI&&) = default;
+
+	~dataIOMPI() = default;
+
+    add_vCtor
+    (
+        DataIOType,
+        DataIOMPIType,
+        IOPattern
+    );
+
+}; //dataIOMPI
+
+
+} //namespace pFlow::MPI
+
+#include "dataIOMPI.cpp"
+
+#endif //__datIOMPI_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPIs.cpp b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPIs.cpp
new file mode 100644
index 00000000..73d307f2
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPIs.cpp
@@ -0,0 +1,27 @@
+
+#include "types.hpp"
+#include "dataIOMPI.hpp"
+
+
+template class pFlow::MPI::dataIOMPI<pFlow::uint8>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::int8>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::int32>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::int64>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::uint32>;
+template class pFlow::MPI::dataIOMPI<pFlow::uint32x3>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::uint64>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::size_t>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::real>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::realx3>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::realx4>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::word>;
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
index 93583714..3e23d15f 100644
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
@@ -24,15 +24,16 @@ Licence:
 #include "scatteredMasterDistribute.hpp"
 #include "scatteredMasterDistributeChar.hpp"
 
-pFlow::MPISimulationDomain::MPISimulationDomain(systemControl& control)
+pFlow::MPI::MPISimulationDomain::MPISimulationDomain(systemControl& control)
 :
     simulationDomain(control),
     communication_(pFlowProcessors()),
-    subDomains_(pFlowProcessors()),
-    domainPartition_( makeUnique<rcb1DPartitioning>(subDict("decomposition"), globalBox_))
+    subDomainsAll_(pFlowProcessors()),
+    numPointsAll_(pFlowProcessors()),
+    domainPartitioning_( makeUnique<rcb1DPartitioning>(subDict("decomposition"), globalBox()))
 {}
 
-bool pFlow::MPISimulationDomain::createBoundaryDicts()
+bool pFlow::MPI::MPISimulationDomain::createBoundaryDicts()
 {
     auto& boundaries = this->subDict("boundaries");
     
@@ -60,48 +61,94 @@ bool pFlow::MPISimulationDomain::createBoundaryDicts()
 			"in dictionary "<< boundaries.globalName()<<endl;
 			return false;
 		}
-        if( initialThisDomainActive() )
+        if( thisDomainActive_ )
         {
             if( neighbors[i] == -1 )
             {
-                bDict.add("mirrorProcessorNo", processors::globalRank());
+                bDict.add("neighborProcessorNo", processors::globalRank());
             }
             else
             {
-                bDict.add("mirrorProcessorNo", neighbors[i]);
+                bDict.add("neighborProcessorNo", neighbors[i]);
                 bDict.addOrReplace("type", "processor");
             }
-            warningInFunction<<"replace the method initialThisDomainActive()"<<endl;
         }
         else
         {
-            bDict.add("mirrorProcessorNo", processors::globalRank());
+            bDict.add("neighborProcessorNo", processors::globalRank());
             bDict.addOrReplace("type", "none");
-            warningInFunction<<"None: replace the method initialThisDomainActive()"<<endl;
         }
 		
+        if( bDict.getVal<word>("type") == "periodic")
+        {
+            fatalErrorInFunction<<
+            "periodic is not implemented "<<endl;
+            fatalExit;
+        }
 	}
 
     return true;
 }
 
-bool pFlow::MPISimulationDomain::setThisDomain()
+bool pFlow::MPI::MPISimulationDomain::setThisDomain()
 {
-    thisDomain_ = domain(domainPartition_->localBox());
-    if(!communication_.collectAllToAll(thisDomain_, subDomains_))
+    thisDomain_ = domain(domainPartitioning_->localBox());
+    uint32 thisNumPoints = initialNumberInThis();
+
+    if(!communication_.collectAllToAll(thisNumPoints, numPointsAll_))
+    {
+        fatalErrorInFunction<<
+        "Failed to distribute number of points."<<endl;
+        return false;
+    }
+    uint32 allNumPoints = std::accumulate(numPointsAll_.begin(), numPointsAll_.end(), 0u);
+
+    if( thisNumPoints != 0u )
+    {
+        thisDomainActive_ = true;
+    }
+    else 
+    {
+        if(communication_.localMaster()&& allNumPoints == 0u)
+            thisDomainActive_ = true;
+        else
+            thisDomainActive_ = false;
+    }
+
+    if( thisDomainActive_ )
+    {
+        bool allInactive = true;
+        for(int32 i=0; i<communication_.localSize(); i++ )
+        {
+            if(i == communication_.localRank() )continue;
+            if(numPointsAll_[i]!=0)
+            {
+                allInactive = false;
+                break;
+            }
+        }
+
+        if(allInactive)
+        {
+            thisDomain_ = domain(globalBox());
+        }
+    }
+    
+    if(!communication_.collectAllToAll(thisDomain_, subDomainsAll_))
     {
         fatalErrorInFunction<< "Failed to distributed domains"<<endl;
         return false;
     }
 
+
     return true;
 }
 
-std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
+std::vector<int> pFlow::MPI::MPISimulationDomain::findPlaneNeighbors() const
 {
 
     std::vector<int> neighbors(sizeOfBoundaries(),  -2);
-    domain gDomain(globalBox_);
+    domain gDomain(globalBox());
 
     // left 
     if( thisDomain_.left().parallelTouch( gDomain.left() ) )
@@ -109,12 +156,12 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
         neighbors[0] = -1;
     }
     
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
     {   
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;
 
         if( thisDomain_.left().parallelTouch(
-            subDomains_[i].right()) )
+            subDomainsAll_[i].right()) )
         {
             neighbors[0] = i;
             break;
@@ -127,13 +174,13 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
         neighbors[1] = -1;
     }
 
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
     {
         
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;
 
         if( thisDomain_.right().parallelTouch(
-            subDomains_[i].left()) )
+            subDomainsAll_[i].left()) )
         {
             neighbors[1] = i;
             break;
@@ -146,12 +193,12 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
         neighbors[2] = -1;
     }
     
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
     {   
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;
 
         if( thisDomain_.bottom().parallelTouch(
-            subDomains_[i].top()) )
+            subDomainsAll_[i].top()) )
         {
             neighbors[2] = i;
             break;
@@ -164,12 +211,12 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
         neighbors[3] = -1;
     }
     
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
     {   
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;
 
         if( thisDomain_.top().parallelTouch(
-            subDomains_[i].bottom()) )
+            subDomainsAll_[i].bottom()) )
         {
             neighbors[3] = i;
             break;
@@ -182,12 +229,12 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
         neighbors[4] = -1;
     }
     
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
     {   
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;
 
         if( thisDomain_.rear().parallelTouch(
-            subDomains_[i].front()) )
+            subDomainsAll_[i].front()) )
         {
             neighbors[4] = i;
             break;
@@ -200,12 +247,12 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
         neighbors[5] = -1;
     }
     
-    for(int i=0; i<sizeOfBoundaries(); i++)
+    for(int i=0; i<subDomainsAll_.size(); i++)
     {   
-        if(i == subDomains_.rank())continue;
+        if(i == subDomainsAll_.rank())continue;
 
         if( thisDomain_.front().parallelTouch(
-            subDomains_[i].rear()) )
+            subDomainsAll_[i].rear()) )
         {
             neighbors[5] = i;
             break;
@@ -215,39 +262,37 @@ std::vector<int> pFlow::MPISimulationDomain::findPlaneNeighbors() const
 }
 
 const pFlow::dictionary &
-pFlow::MPISimulationDomain::thisBoundaryDict() const
+pFlow::MPI::MPISimulationDomain::thisBoundaryDict() const
 {
     return this->subDict("MPIBoundaries");
 }
 
-bool pFlow::MPISimulationDomain::initialUpdateDomains(span<realx3> pointPos)
+bool pFlow::MPI::MPISimulationDomain::initialUpdateDomains(span<realx3> pointPos)
 {
     pFlagTypeHost flags(pointPos.size(), 0 , pointPos.size());
     initialNumPoints_ = pointPos.size();
-    if( !domainPartition_->partition(pointPos, flags) )
+    if( !domainPartitioning_->partition(pointPos, flags) )
     {
+        fatalErrorInFunction<<
+        "Point partitioning failed."<<endl;
         return false;
     }
     
     if(!setThisDomain()) return false;
+
     if(!createBoundaryDicts()) return false;
 
     return true;
 }
 
-pFlow::uint32 pFlow::MPISimulationDomain::initialNumberInThis() const
+pFlow::uint32 pFlow::MPI::MPISimulationDomain::initialNumberInThis() const
 {
-    uint32 numImport = domainPartition_->numberImportThisProc();
-    uint32 numExport = domainPartition_->numberExportThisProc();
+    uint32 numImport = domainPartitioning_->numberImportThisProc();
+    uint32 numExport = domainPartitioning_->numberExportThisProc();
     return max(initialNumPoints_+ numImport - numExport, 0u);
 }
 
-bool pFlow::MPISimulationDomain::initialThisDomainActive() const
-{
-    return initialNumberInThis()>0;
-}
-
-bool pFlow::MPISimulationDomain::initialTransferBlockData
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
 (  
     span<char> src, 
     span<char> dst, 
@@ -256,7 +301,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
 {
     MPI::scatteredMasterDistribute<char> dataDist(sizeOfElement, pFlowProcessors());
     
-    auto lists = domainPartition_->allExportLists();
+    auto lists = domainPartitioning_->allExportLists();
     
     if(!dataDist.setDataMaps( lists ))
     {
@@ -273,7 +318,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
     return true;
 }
 
-bool pFlow::MPISimulationDomain::initialTransferBlockData
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
 (
     span<realx3> src, 
     span<realx3> dst
@@ -282,8 +327,8 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
     
     MPI::scatteredMasterDistribute<realx3> 
         dataDist(pFlowProcessors());
-    auto lists = domainPartition_->allExportLists();
-
+    auto lists = domainPartitioning_->allExportLists();
+    
     if(!dataDist.setDataMaps( lists ))
     {
         fatalErrorInFunction;
@@ -300,7 +345,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
     return true;
 }
 
-bool pFlow::MPISimulationDomain::initialTransferBlockData
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
 (
     span<real> src, 
     span<real> dst
@@ -309,7 +354,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
     MPI::scatteredMasterDistribute<real> 
         dataDist(pFlowProcessors());
 
-    auto lists = domainPartition_->allExportLists();
+    auto lists = domainPartitioning_->allExportLists();
 
     if(!dataDist.setDataMaps( lists ))
     {
@@ -327,7 +372,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
     return true;
 }
 
-bool pFlow::MPISimulationDomain::initialTransferBlockData
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
 (
     span<uint32> src, 
     span<uint32> dst
@@ -336,7 +381,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
     MPI::scatteredMasterDistribute<uint32> 
         dataDist(pFlowProcessors());
 
-    auto lists = domainPartition_->allExportLists();
+    auto lists = domainPartitioning_->allExportLists();
 
     if(!dataDist.setDataMaps( lists ))
     {
@@ -354,7 +399,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
     return true;
 }
 
-bool pFlow::MPISimulationDomain::initialTransferBlockData
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
 (
     span<int32> src, 
     span<int32> dst
@@ -363,7 +408,7 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
     MPI::scatteredMasterDistribute<int32> 
         dataDist(pFlowProcessors());
 
-    auto lists = domainPartition_->allExportLists();
+    auto lists = domainPartitioning_->allExportLists();
 
     if(!dataDist.setDataMaps( lists ))
     {
@@ -381,35 +426,25 @@ bool pFlow::MPISimulationDomain::initialTransferBlockData
     return true;
 }
 
-/*bool pFlow::MPISimulationDomain::updateDomains(
-    span<realx3> pointPos,
-    pFlagTypeHost flags)
-{
-    if( !domainPartition_->partition(pointPos, flags) )
-    {
-        return false;
-    }
-        
-    if(!setThisDomain()) return false;
-    if(!createBoundaryDicts()) return false;
-	
-    return true;
-}*/
 
-pFlow::uint32 pFlow::MPISimulationDomain::numberToBeImported() const
+pFlow::uint32 pFlow::MPI::MPISimulationDomain::numberToBeImported() const
 {
-    return domainPartition_->numberImportThisProc();
+    return domainPartitioning_->numberImportThisProc();
 }
 
-pFlow::uint32 pFlow::MPISimulationDomain::numberToBeExported() const
+pFlow::uint32 pFlow::MPI::MPISimulationDomain::numberToBeExported() const
 {
-    return domainPartition_->numberExportThisProc();
+    return domainPartitioning_->numberExportThisProc();
 }
 
-
-
-bool pFlow::MPISimulationDomain::requiresDataTransfer() const
+bool
+pFlow::MPI::MPISimulationDomain::domainActive() const
 {
-    notImplementedFunction;
-    return false;
+	return thisDomainActive_;
+}
+
+const pFlow::domain&
+pFlow::MPI::MPISimulationDomain::thisDomain() const
+{
+	return thisDomain_;
 }
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
index feef591c..bab83611 100644
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
@@ -2,17 +2,17 @@
       O        C enter of
      O O       E ngineering and
     O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
+   OOOOOOO     F luid flow
 ------------------------------------------------------------------------------
   Copyright (C): www.cemf.ir
   email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
+------------------------------------------------------------------------------
 Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
+  This file is part of phasicFlow code. It is a free software for simulating
   granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
   granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
   implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
@@ -20,100 +20,99 @@ Licence:
 #ifndef __MPISimulationDomain_hpp__
 #define __MPISimulationDomain_hpp__
 
-#include "simulationDomain.hpp"
 #include "partitioning.hpp"
-#include "procVector.hpp"
 #include "procCommunication.hpp"
+#include "procVector.hpp"
+#include "simulationDomain.hpp"
 
-namespace pFlow
+namespace pFlow::MPI
 {
 
-class MPISimulationDomain
-:
-    public simulationDomain
+class MPISimulationDomain : public simulationDomain
 {
-protected:
-    MPI::procCommunication      communication_;
+private:
 
-    MPI::procVector<domain>      subDomains_;
+	/// a processor communcator for simulation domain 
+	procCommunication  communication_;
 
-    uniquePtr<partitioning> domainPartition_ = nullptr;
+	/// sub-domain (thisDomain_ for all processors)
+	procVector<domain> subDomainsAll_;
 
-    uint32 initialNumPoints_ = 0;
+	/// number of points in all processors 
+	procVector<uint32> numPointsAll_;
 
-    bool createBoundaryDicts() override;
+	/// partitioning object 
+	uniquePtr<partitioning> domainPartitioning_ = nullptr;
 
-	bool setThisDomain() override;
+	/// the acutal limits of the simulation domain in this processor 
+	domain                  thisDomain_;
 
-    std::vector<int> 
-    findPlaneNeighbors()const;
+	uint32                  initialNumPoints_ = 0;
+
+	bool                    thisDomainActive_ = false;
+
+	bool                    createBoundaryDicts() final;
+
+	bool                    setThisDomain() final;
+
+	std::vector<int>        findPlaneNeighbors() const;
 
 public:
 
-    TypeInfo("simulationDomain<MPI>");
+	TypeInfo("simulationDomain<MPI>");
 
-    MPISimulationDomain(systemControl& control);
+	explicit MPISimulationDomain(systemControl& control);
 
-	virtual
-	~MPISimulationDomain()=default;
+	~MPISimulationDomain() final = default;
 
-    add_vCtor
-	(
-		simulationDomain,
-		MPISimulationDomain,
-		systemControl
-	);
+	add_vCtor
+    (
+        simulationDomain, 
+        MPISimulationDomain, 
+        systemControl
+    );
 
-	const dictionary& thisBoundaryDict()const override;
-    
-    /// @brief 
-    /// @param pointPos 
-    /// @return 
-    bool initialUpdateDomains(span<realx3> pointPos)override;
-    
+	const dictionary& thisBoundaryDict() const final;
 
-    /// @brief 
-    /// @return 
-    uint32 initialNumberInThis()const override;
+	/// @brief
+	/// @param pointPos
+	/// @return
+	bool initialUpdateDomains(span<realx3> pointPos) final;
 
-    bool initialThisDomainActive()const override;
+	/// @brief
+	/// @return
+	uint32 initialNumberInThis() const final;
 
-    bool initialTransferBlockData(
-		span<char> src, 
-		span<char> dst,
-		size_t sizeOfElement)const override;
-    
-    bool initialTransferBlockData(
-        span<realx3> src,
-        span<realx3> dst) const override;
-	
-    bool initialTransferBlockData(
-        span<real> src,
-        span<real> dst) const override;
-    
-    bool initialTransferBlockData(
-        span<uint32> src,
-        span<uint32> dst) const override;
-    
-    bool initialTransferBlockData(
-        span<int32> src,
-        span<int32> dst) const override;
-    
+	bool  initialTransferBlockData(
+			span<char> src,
+			span<char> dst,
+			size_t     sizeOfElement
+			) const final;
 
-    /*bool updateDomains(
-		span<realx3> pointPos,
-        pFlagTypeHost flags) override;*/
+	bool initialTransferBlockData(span<realx3> src, span<realx3> dst)
+	  const final;
 
+	bool initialTransferBlockData(span<real> src, span<real> dst)
+	  const final;
 
-	uint32 numberToBeImported()const override;
-    
-    uint32 numberToBeExported()const override;
-	
-	bool requiresDataTransfer() const override;
-    
+	bool initialTransferBlockData(span<uint32> src, span<uint32> dst)
+	  const final;
 
+	bool initialTransferBlockData(span<int32> src, span<int32> dst)
+	  const final;
+
+	uint32 numberToBeImported() const final;
+
+	uint32 numberToBeExported() const final;
+
+	/// @brief Is this domain active?
+	/// Active mean, there is particle in it and
+	/// boundaries and other entities of simulation domains are valid
+	bool   domainActive() const final;
+
+	const domain& thisDomain()const final;
 };
 
-}
+} // namespace pFlow::MPI
 
-#endif
\ No newline at end of file
+#endif // 
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.cpp b/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.cpp
new file mode 100644
index 00000000..0ae5cf82
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.cpp
@@ -0,0 +1,113 @@
+
+
+#include "partitioning.hpp"
+#include "error.hpp"
+#include "streams.hpp"
+
+void pFlow::partitioning::freeZoltan()
+{
+	if(validPointers_)
+	{
+		Zoltan::LB_Free_Part(&importGlobalGids_, &importLocalGids_, 
+                      	 	 &importProcs_, &importToPart_);
+
+		Zoltan::LB_Free_Part(&exportGlobalGids_, &exportLocalGids_, 
+                      		 &exportProcs_, &exportToPart_);
+        validPointers_ = false;
+	}
+	
+	zoltan_.release();
+}
+
+
+pFlow::partitioning::partitioning
+(
+    const dictionary& dict, 
+    const box& globalBox
+)
+:
+	globalBox_(globalBox)
+{
+	if(!zoltanInitialized__)
+	{
+		auto rc = Zoltan_Initialize
+        (
+            processors::argc(), 
+            processors::argv(), 
+            &version_
+        );
+        
+		if (rc != ZOLTAN_OK)
+		{
+			fatalErrorInFunction<<"Cannot initialize zoltan"<<endl;
+			fatalExit;
+		}
+		zoltanInitialized__ = true;
+	}
+
+	// Creates Zoltan object 
+	zoltan_ = std::make_unique<Zoltan>(pFlowProcessors().localCommunicator());
+
+	zoltan_->Set_Param("DEBUG_LEVEL", "0");
+  	zoltan_->Set_Param("LB_METHOD", "RCB");
+  	zoltan_->Set_Param("NUM_GID_ENTRIES", "1"); 
+  	zoltan_->Set_Param("NUM_LID_ENTRIES", "1");
+  	zoltan_->Set_Param("OBJ_WEIGHT_DIM", "0");
+    zoltan_->Set_Param("RETURN_LISTS", "ALL");
+	
+}
+
+bool pFlow::partitioning::partition(span<realx3> points, pFlagTypeHost flags)
+{
+    pointCollection pointCollctn{points, flags};
+
+    return partition(pointCollctn);
+}
+int GetObjectSize
+(
+    void *data,
+    int num_gid_entries, 
+    int num_lid_entries,
+    ZOLTAN_ID_PTR global_id,
+    ZOLTAN_ID_PTR local_id,
+    int *ierr
+)
+{
+    *ierr = ZOLTAN_OK;
+    pFlow::uint32 s = *(static_cast<pFlow::uint32*>(data));
+    return static_cast<int>(s);
+}
+
+void PackObject 
+(
+    void *data,
+    int num_gid_entries,
+    int num_lid_entries,
+    ZOLTAN_ID_PTR global_id,
+    ZOLTAN_ID_PTR local_id,
+    int dest,
+    int size,
+    char *buf,
+    int *ierr
+)
+{
+    
+}
+
+bool pFlow::partitioning::migrateData(span<char> src, span<char> dst, uint32 elementSize)
+{
+    dataCollection data{src, dst, elementSize};
+
+    zoltan_->Set_Obj_Size_Fn(GetObjectSize, &elementSize);
+    return false;
+}
+
+pFlow::partitioning::~partitioning()
+{
+	freeZoltan();
+}
+
+void pFlow::partitioning::printBox()const
+{
+	pOutput<< "localBox:" << localBox_<<endl;
+}
diff --git a/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.hpp b/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.hpp
new file mode 100644
index 00000000..c9483051
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.hpp
@@ -0,0 +1,168 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __partitioning_hpp__
+#define __partitioning_hpp__
+
+#include "zoltan_cpp.h"
+
+#include "pFlowProcessors.hpp"
+#include "virtualConstructor.hpp"
+#include "box.hpp"
+#include "span.hpp"
+#include "pointFlag.hpp"
+#include "procVector.hpp"
+
+namespace pFlow
+{
+
+struct pointCollection
+{
+	span<realx3> points_;
+	pFlagTypeHost pFlag_;
+
+	uint32 numActivePoints()const
+	{
+		return pFlag_.numActive();
+	}
+};
+
+struct dataCollection
+{
+    span<char> srcData_;
+    span<char> dstData_;
+    uint32 elementSize_;
+};
+
+class partitioning
+{
+protected:
+
+	float 					version_ 	= 0.0;
+
+	std::unique_ptr<Zoltan> zoltan_ 	= nullptr;
+
+	bool					validPointers_ = false;
+
+	box 					globalBox_;
+
+	box 					localBox_; 	
+
+	int32 	changes_, numImport_, numExport_;
+  	
+  	id_t *importGlobalGids_, *importLocalGids_, *exportGlobalGids_, *exportLocalGids_; 
+  	
+  	int32 *importProcs_, *importToPart_, *exportProcs_, *exportToPart_;
+
+    uint32 numBeforePartition_ = 0 ;
+
+	static inline bool 		zoltanInitialized__ = false;
+
+	void freeZoltan();
+
+	virtual 
+	bool partition(pointCollection& points) = 0;
+
+public:
+
+	partitioning(
+		const dictionary& dict, 
+		const box& globalBox);
+
+	virtual 
+	~partitioning();
+
+    create_vCtor(
+        partitioning,
+        dictionary,
+        (
+            const dictionary& dict, 
+		    const box& globalBox
+        ),
+        (dict, globalBox));
+
+	bool partition(
+		span<realx3> points, 
+		pFlagTypeHost flags);
+
+    
+    bool migrateData(span<char> src, span<char> dst, uint32 elementSize);
+
+	inline
+	auto localBox()const
+	{
+		return localBox_;
+	}
+
+	inline
+	const auto& globalBox()const
+	{
+		return globalBox_;
+	}
+
+	inline 
+	bool partitionsChanged()const
+	{
+		return changes_ == 1;
+	}
+
+    
+    uint32 numberImportThisProc()const
+    {
+        return numImport_;
+    }
+
+    uint32 numberExportThisProc()const
+    {
+        return numExport_;
+    }
+    
+    virtual
+    span<int32> exportList(int procNo)const = 0;
+
+    virtual 
+    pFlow::MPI::procVector<span<int32>> allExportLists()const=0;
+
+	void printBox()const;
+
+	
+};
+
+
+}
+
+
+#endif //__partitioning_hpp__
+
+
+
+/*static 
+	int getNumberOfPoints(void *data, int32 *ierr);
+
+	static 
+	void getPointList(
+		void *data, 
+		int32 sizeGID, 
+		int32 sizeLID,
+        id_t* globalID, 
+        id_t* localID,
+        int32 wgt_dim, 
+        float *obj_wgts, 
+        int32 *ierr);*/
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp b/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
new file mode 100644
index 00000000..ba147512
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
@@ -0,0 +1,330 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "zoltan_cpp.h"
+
+
+#include "error.hpp"
+#include "processors.hpp"
+#include "rcb1DPartitioning.hpp"
+
+bool pFlow::rcb1DPartitioning::partition(pointCollection &points)
+{
+
+    zoltan_->Set_Param("RCB_OUTPUT_LEVEL", "0");
+  	zoltan_->Set_Param("RCB_RECTILINEAR_BLOCKS", "1");
+  	zoltan_->Set_Param("KEEP_CUTS", "1"); 
+  	zoltan_->Set_Param("REDUCE_DIMENSIONS", "1");
+  	zoltan_->Set_Param("RCB_RECOMPUTE_BOX", "1");
+  	zoltan_->Set_Param("AVERAGE_CUTS", "0");
+    zoltan_->Set_Param("MIGRATE_ONLY_PROC_CHANGES", "0");
+
+  	zoltan_->Set_Num_Obj_Fn(rcb1DPartitioning::getNumberOfPoints, &points);
+  	zoltan_->Set_Obj_List_Fn(rcb1DPartitioning::getPointList, &points);
+  	zoltan_->Set_Num_Geom_Fn(rcb1DPartitioning::getNumGeometry, &points);
+  	switch (direction_)
+    {
+    case Direction::X:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_x, &points);
+        break;
+    case Direction::Y:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_y, &points);
+        break;
+    case Direction::Z:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_z, &points);
+        break;
+    }
+    
+	int numGidEntries_, numLidEntries_;
+	int rc = zoltan_->LB_Partition(changes_, numGidEntries_, numLidEntries_,
+    	numImport_, importGlobalGids_, importLocalGids_, importProcs_, importToPart_,
+    	numExport_, exportGlobalGids_, exportLocalGids_, exportProcs_, exportToPart_);
+
+    
+  	if (rc != ZOLTAN_OK)
+  	{	
+        fatalErrorInFunction<< "Zoltan faild to perform partitioning."<<endl;
+    	return false;
+  	}
+    
+    for(auto& ids:exportIds_)
+    {
+        ids.clear();
+    }
+    
+    std::vector<int32> thisProc(points.numActivePoints(),-1);
+        
+    for(auto i =0; i<numExport_; i++)
+    {
+        exportIds_[exportProcs_[i]].push_back(exportGlobalGids_[i]);
+        thisProc[exportGlobalGids_[i]] = exportGlobalGids_[i];
+    }
+
+    for(int i=0; i<thisProc.size(); i++)
+    {
+        if(thisProc[i]==-1)
+            exportIds_[0].push_back(i);
+    }
+    
+  	validPointers_ = true;
+
+  	int nDim;
+  	double x0;
+  	double y0;
+  	double z0;
+  	double x1;
+  	double y1;
+  	double z1;
+  	zoltan_->RCB_Box
+    (
+        processors::globalRank(), 
+        nDim,
+   		x0, y0, z0,
+   		x1, y1, z1
+    );
+
+  	localBox_ = globalBox_;
+
+    if(equal(x0, x1))
+    {
+        x0 = x0 - 0.00001;
+        x1 = x1 + 0.00001;
+    }
+
+    switch (direction_)
+    {
+    case Direction::X :
+        localBox_.minPoint().x_ = x0;
+  	    localBox_.maxPoint().x_ = x1;
+        break;
+
+    case Direction::Y :
+        localBox_.minPoint().y_ = x0;
+  	    localBox_.maxPoint().y_ = x1;
+        break;
+    
+    case Direction::Z :
+        localBox_.minPoint().z_ = x0;
+  	    localBox_.maxPoint().z_ = x1;
+        break;
+    }
+  	
+
+    localBox_.minPoint() = max(localBox_.minPoint(), globalBox_.minPoint());
+    localBox_.maxPoint() = min(localBox_.maxPoint(), globalBox_.maxPoint());
+
+
+  	return true;
+}
+
+pFlow::rcb1DPartitioning::rcb1DPartitioning
+(
+    const dictionary &dict,
+    const box &globalBox
+)
+: 
+    partitioning(dict, globalBox),
+    exportIds_(pFlowProcessors())
+{
+
+    word directionName = dict.getVal<word>("direction");
+
+	if(toUpper(directionName)== "X")
+    {
+        direction_ = Direction::X;
+        dirVector_ ={1.0, 0.0, 0.0};
+    }
+    else if( toUpper(directionName) == "Y")
+    {
+        direction_ = Direction::Y;
+        dirVector_ ={0.0, 1.0, 0.0};
+    }
+    else if( toUpper(directionName) == "Z")
+    {
+        direction_ = Direction::Z;
+        dirVector_ ={0.0, 0.0, 1.0};
+    }
+    else
+    {
+        fatalErrorInFunction<< "wrong direction  in dictionary "<<
+        dict.globalName()<<". Directions should be one of x, y, or z."<<endl;
+        fatalError;
+    }       
+			
+}
+
+int pFlow::rcb1DPartitioning::getNumGeometry(void *data, int *ierr)
+{
+  *ierr = ZOLTAN_OK;
+  return 1;
+}
+
+int pFlow::rcb1DPartitioning::getNumberOfPoints(void *data, int *ierr)
+{
+    auto *obj = static_cast<pointCollection *>(data);
+
+    *ierr = ZOLTAN_OK;
+
+    return obj->numActivePoints();
+}
+
+void pFlow::rcb1DPartitioning::getPointList
+(
+    void *data, 
+    int sizeGID, 
+    int sizeLID, 
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID, 
+    int wgt_dim, 
+    float *obj_wgts, 
+    int *ierr
+)
+{			
+    auto* obj = static_cast<pointCollection *>(data);
+    *ierr = ZOLTAN_OK;
+    
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            globalID[n] = i;
+            localID[n] = n;
+            n++;
+        }
+    }
+
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_x
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].x_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_y
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].y_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_z
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].z_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
diff --git a/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.hpp b/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.hpp
new file mode 100644
index 00000000..b58532e3
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.hpp
@@ -0,0 +1,240 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __rcb1DPartitioning_hpp__
+#define __rcb1DPartitioning_hpp__
+
+#include "partitioning.hpp"
+#include "procVector.hpp"
+
+namespace pFlow
+{
+
+
+class rcb1DPartitioning
+:
+public partitioning
+{
+public:
+
+	enum Direction  
+	{
+		X = 0,
+		Y = 1,
+		Z = 2
+	};
+
+protected:
+
+	/// Direction of partitioning
+	Direction   direction_ = Direction::X;
+
+	realx3 		dirVector_ = {1.0, 0.0, 0.0};
+
+    word        directionName_ = "x";
+
+    MPI::procVector<std::vector<int>> exportIds_;
+	
+	bool partition(pointCollection& points) override;
+
+public:
+
+	
+	rcb1DPartitioning(
+		const dictionary& dict, 
+		const box& globalBox);
+	
+
+	~rcb1DPartitioning() override=default;
+
+    span<int32> exportList(int procNo)const override
+    {
+        return span<int32>(
+            const_cast<int32*>(exportIds_[procNo].data()), 
+            exportIds_[procNo].size());
+    }
+
+    
+    pFlow::MPI::procVector<span<int32>> allExportLists()const override
+    {
+        pFlow::MPI::procVector<span<int32>> allList(pFlowProcessors());
+
+        for(int i=0; i<allList.size(); i++)
+            allList[i]= exportList(i);
+
+        return allList;
+    }
+
+	static 
+	int getNumGeometry(void *data, int *ierr);
+
+	static
+	int getNumberOfPoints(void *data, int *ierr);
+	
+
+	static
+	void getPointList
+	(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		ZOLTAN_ID_PTR globalID, 
+		ZOLTAN_ID_PTR localID,
+		int wgt_dim, 
+		float *obj_wgts, 
+		int *ierr
+	);
+
+    static
+    void getGeometryList_x(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+
+    static
+    void getGeometryList_y(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+
+    static
+    void getGeometryList_z(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+};
+
+/*class RCB_y_partitioning
+:
+public partitioning
+{
+public:
+
+	
+	RCB_y_partitioning(int argc, char *argv[], pointCollection& collection, const box& gBox)
+	:
+	partitioning(argc, argv, collection, gBox)
+	{}
+
+	virtual 
+	~RCB_y_partitioning()=default;
+
+	
+	bool partition() override;
+
+
+	static 
+	void getGeometryList(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		int num_obj,
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID,
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr)
+	{
+
+		auto* obj = static_cast<pointCollection *>(data);
+
+	  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+	  	{
+	    	*ierr = ZOLTAN_FATAL;
+	    	return;
+	  	}
+
+	  	*ierr = ZOLTAN_OK;
+
+	  	for (int i=0;  i < num_obj ; i++)
+	  	{
+	    	geom_vec[i] 	  = obj->pointList()[i].y_;
+		}
+
+	  return;
+	}
+
+	
+	static 
+	int getNumGeometry(void *data, int *ierr)
+	{
+	  *ierr = ZOLTAN_OK;
+	  return 1;
+	}
+
+};
+
+
+class RCB_x_partitioning
+:
+public partitioning
+{
+public:
+
+	
+	RCB_x_partitioning(int argc, char *argv[], pointCollection& collection, const box& gBox)
+	:
+	partitioning(argc, argv, collection, gBox)
+	{}
+
+	virtual 
+	~RCB_x_partitioning()=default;
+
+	
+	bool partition() override;
+
+
+	static 
+	void getGeometryList(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		int num_obj,
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID,
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+	
+	static 
+	int getNumGeometry(void *data, int *ierr);
+	
+
+};*/
+
+} // pFlow
+#endif //__rcb1DPartitioning_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/mpiCommunication.hpp b/src/phasicFlow/MPIParallelization/mpiCommunication.hpp
index 4c43038d..05a41fd5 100644
--- a/src/phasicFlow/MPIParallelization/mpiCommunication.hpp
+++ b/src/phasicFlow/MPIParallelization/mpiCommunication.hpp
@@ -26,9 +26,6 @@ Licence:
 #include "types.hpp"
 #include "span.hpp"
 
-#ifdef pFlow_Build_MPI
-
-
 
 namespace pFlow::MPI
 {
@@ -375,9 +372,6 @@ inline auto typeFree(DataType& type)
 
 }
 
-#endif //pFlow_Build_MPI
-
-
 
 
 #endif  //__mpiCommunication_H__
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
new file mode 100644
index 00000000..2595ebaa
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
@@ -0,0 +1,110 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+template<class T, class MemorySpace>
+void
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::checkDataRecieved() const
+{
+	if (!dataRecieved_)
+	{
+		//uint32 nRecv  = reciever_.waitComplete();
+		dataRecieved_ = true;
+		/*if (nRecv != this->neighborProcSize())
+		{
+			fatalErrorInFunction;
+			fatalExit;
+		}*/
+	}
+}
+
+template<class T, class MemorySpace>
+bool
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
+  int           step,
+  DataDirection direction
+)
+{
+	/*if (step == 1)
+	{
+		// Isend
+		if (direction == DataDirection::TwoWay || 
+		( this->isBoundaryMaster() && direction == DataDirection::MasterToSlave) || 
+		(!this->isBoundaryMaster() && direction == DataDirection::SlaveToMaster))
+		{
+			sender_.sendData(pFlowProcessors(), this->thisField());
+			dataRecieved_ = false;
+		}
+	}
+	else if (step == 2)
+	{
+		// Irecv
+		if (direction == DataDirection::TwoWay || 
+		(!this->isBoundaryMaster() && direction == DataDirection::MasterToSlave) || 
+		( this->isBoundaryMaster() && direction == DataDirection::SlaveToMaster))
+		{
+			reciever_.recieveData(pFlowProcessors(), this->neighborProcSize());
+			dataRecieved_ = false;
+		}
+	}
+	else
+	{
+		fatalErrorInFunction << "Invalid step number " << step << endl;
+		return false;
+	}*/
+
+	return true;
+}
+
+template<class T, class MemorySpace>
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::processorBoundaryField(
+  const boundaryBase&   boundary,
+  const pointStructure& pStruct,
+  InternalFieldType&    internal
+)
+  : BoundaryFieldType(boundary, pStruct, internal),
+    sender_(
+      groupNames("sendBufferField", boundary.name()),
+      boundary.neighborProcessorNo(),
+      boundary.thisBoundaryIndex()
+    ),
+    reciever_(
+      groupNames("neighborProcField", boundary.name()),
+      boundary.neighborProcessorNo(),
+      boundary.mirrorBoundaryIndex()
+    )
+{
+}
+
+template<class T, class MemorySpace>
+typename pFlow::MPI::processorBoundaryField<T, MemorySpace>::ProcVectorType&
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::neighborProcField()
+{
+	checkDataRecieved();
+	return reciever_.buffer();
+}
+
+template<class T, class MemorySpace>
+const typename pFlow::MPI::processorBoundaryField<T, MemorySpace>::
+  ProcVectorType&
+  pFlow::MPI::processorBoundaryField<T, MemorySpace>::neighborProcField() const
+{
+	checkDataRecieved();
+	return reciever_.buffer();
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
new file mode 100644
index 00000000..5fb0780a
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
@@ -0,0 +1,113 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __processorBoundaryField_hpp__
+#define __processorBoundaryField_hpp__
+
+#include "boundaryField.hpp"
+#include "dataSender.hpp"
+#include "dataReciever.hpp"
+
+namespace pFlow::MPI
+{
+
+template< class T, class MemorySpace = void>
+class processorBoundaryField
+:
+    public boundaryField<T, MemorySpace> 
+{
+public:
+	
+    using processorBoundaryFieldType = processorBoundaryField<T, MemorySpace>;
+
+	using BoundaryFieldType = boundaryField<T, MemorySpace>;
+
+	using InternalFieldType = typename BoundaryFieldType::InternalFieldType;
+
+	using memory_space 		= typename BoundaryFieldType::memory_space;
+
+	using execution_space 	= typename BoundaryFieldType::execution_space;
+
+	using FieldAccessType 	= typename BoundaryFieldType::FieldAccessType;
+
+	using ProcVectorType  = typename BoundaryFieldType::ProcVectorType;
+
+private:
+
+	dataSender<T, MemorySpace>           sender_;
+
+	mutable dataReciever<T, MemorySpace> reciever_;
+
+	mutable bool                          dataRecieved_ = true;
+
+	void checkDataRecieved()const;
+
+	bool updateBoundary(int step, DataDirection direction);
+	
+
+public:
+
+	TypeInfoTemplate211("boundaryField","processor", T, memory_space::name());
+
+	processorBoundaryField(
+		const boundaryBase& boundary, 
+		const pointStructure& pStruct,
+		InternalFieldType& internal);
+
+	
+	~processorBoundaryField()override = default;
+
+	add_vCtor
+	(
+		BoundaryFieldType,
+		processorBoundaryFieldType,
+		boundaryBase
+	);
+	
+	ProcVectorType& neighborProcField() override;
+
+	
+	const ProcVectorType& neighborProcField()const override;
+
+	bool hearChanges
+	(
+		real t,
+		real dt,
+		uint32 iter,
+		const message& msg, 
+    	const anyList& varList
+	) override
+    {
+		BoundaryFieldType::hearChanges(t,dt,iter, msg,varList);
+		
+		if(msg.equivalentTo(message::BNDR_DELETE))
+		{
+			// do nothing;
+		}
+		
+		return true;
+	}
+
+};
+
+}
+
+#include "processorBoundaryField.cpp"
+
+#endif //__processorBoundaryField_hpp__
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryFields.cpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryFields.cpp
new file mode 100644
index 00000000..f07f20d9
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryFields.cpp
@@ -0,0 +1,24 @@
+
+//#include "Field.hpp"
+
+#include "processorBoundaryField.hpp"
+
+template class pFlow::MPI::processorBoundaryField<pFlow::uint8>;
+template class pFlow::MPI::processorBoundaryField<pFlow::uint8, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::uint32>;
+template class pFlow::MPI::processorBoundaryField<pFlow::uint32, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::uint64>;
+template class pFlow::MPI::processorBoundaryField<pFlow::uint64, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::real>;
+template class pFlow::MPI::processorBoundaryField<pFlow::real, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::realx3>;
+template class pFlow::MPI::processorBoundaryField<pFlow::realx3, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::realx4>;
+template class pFlow::MPI::processorBoundaryField<pFlow::realx4, pFlow::HostSpace>;
+
+
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
new file mode 100644
index 00000000..50098e0a
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
@@ -0,0 +1,148 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "boundaryProcessor.hpp"
+#include "dictionary.hpp"
+#include "mpiCommunication.hpp"
+
+void
+pFlow::MPI::boundaryProcessor::checkSize() const
+{
+	if (!sizeObtained_)
+	{
+		//MPI_Wait(&sizeRequest_, StatusIgnore);
+		sizeObtained_ = true;
+	}
+}
+
+void
+pFlow::MPI::boundaryProcessor::checkDataRecieved() const
+{
+	if (!dataRecieved_)
+	{
+		//uint32 nRecv  = reciever_.waitComplete();
+		dataRecieved_ = true;
+		/*if (nRecv != neighborProcSize())
+		{
+			fatalErrorInFunction;
+			fatalExit;
+		}*/
+	}
+}
+
+pFlow::MPI::boundaryProcessor::boundaryProcessor(
+  const dictionary& dict,
+  const plane&      bplane,
+  internalPoints&   internal,
+  boundaryList&     bndrs,
+  uint32            thisIndex
+)
+  : boundaryBase(dict, bplane, internal, bndrs, thisIndex),
+    sender_(
+      groupNames("sendBuffer", name()),
+      neighborProcessorNo(),
+      thisBoundaryIndex()
+    ),
+    reciever_(
+      groupNames("neighborProcPoints", name()),
+      neighborProcessorNo(),
+      mirrorBoundaryIndex()
+    )
+{
+}
+
+bool
+pFlow::MPI::boundaryProcessor::beforeIteration(uint32 iterNum, real t, real dt)
+{
+	thisNumPoints_ = size();
+
+	auto req = MPI_REQUEST_NULL;
+	MPI_Isend(
+		&thisNumPoints_,
+		1,
+		MPI_UNSIGNED,
+		neighborProcessorNo(),
+		thisBoundaryIndex(),
+		pFlowProcessors().localCommunicator(),
+		&req);
+
+	MPI_Recv(
+		&neighborProcNumPoints_,
+		1,
+		MPI_UNSIGNED,
+		neighborProcessorNo(),
+		mirrorBoundaryIndex(),
+		pFlowProcessors().localCommunicator(),
+		MPI_STATUS_IGNORE
+	);
+
+	sizeObtained_ = false;
+
+	return true;
+}
+
+pFlow::uint32
+pFlow::MPI::boundaryProcessor::neighborProcSize() const
+{
+	checkSize();
+	return neighborProcNumPoints_;
+}
+
+pFlow::realx3Vector_D&
+pFlow::MPI::boundaryProcessor::neighborProcPoints()
+{
+	checkDataRecieved();
+	return reciever_.buffer();
+}
+
+const pFlow::realx3Vector_D&
+pFlow::MPI::boundaryProcessor::neighborProcPoints() const
+{
+	checkDataRecieved();
+	return reciever_.buffer();
+}
+
+bool
+pFlow::MPI::boundaryProcessor::updataBoundary(int step)
+{
+	if (step == 1)
+	{
+		sender_.sendData(pFlowProcessors(), thisPoints());
+		dataRecieved_ = false;
+	}
+	else if (step == 2)
+	{
+		reciever_.recieveData(pFlowProcessors(), neighborProcSize());
+		dataRecieved_ = false;
+	}
+	return true;
+}
+
+bool
+pFlow::MPI::boundaryProcessor::iterate(uint32 iterNum, real t, real dt)
+{
+	return true;
+}
+
+bool
+pFlow::MPI::boundaryProcessor::afterIteration(uint32 iterNum, real t, real dt)
+{
+	return true;
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
new file mode 100644
index 00000000..cb278461
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
@@ -0,0 +1,116 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __boundaryProcessor_hpp__
+#define __boundaryProcessor_hpp__
+
+
+#include "boundaryBase.hpp"
+#include "mpiTypes.hpp"
+#include "dataSender.hpp"
+#include "dataReciever.hpp"
+
+namespace pFlow::MPI
+{
+
+class boundaryProcessor
+:
+ 	public boundaryBase
+{
+private:
+
+	uint32                       neighborProcNumPoints_ = 0;
+
+	uint32                       thisNumPoints_;
+
+	realx3Vector_D               neighborProcPoints_;
+
+	mutable Request              sizeRequest_;
+
+	mutable Request 			sSizeRequest_;
+
+	int req_=0;
+
+	mutable bool                 sizeObtained_ = true;
+
+	mutable dataSender<realx3>   sender_;
+
+	mutable dataReciever<realx3> reciever_;
+
+	mutable bool                 dataRecieved_ = true;
+
+	void checkSize()const;
+
+	void checkDataRecieved()const;
+	
+	/// @brief  Update processor boundary data for this processor
+	/// @param step It is either 1 or 2 in the input to indicate 
+	/// the update step 
+	/// @return true if successful 
+	/// @details This method is called by boundaryList two times to 
+	/// allow processor boundaries to exchange data in two steps.
+	/// The first step is a buffered non-blocking send and the second
+	/// step is non-blocking recieve to get data. 
+	bool updataBoundary(int step)override;
+
+public:
+
+	TypeInfo("boundary<processor>");
+
+	boundaryProcessor(
+	  const dictionary& dict,
+	  const plane&      bplane,
+	  internalPoints&   internal,
+	  boundaryList&     bndrs,
+	  uint32            thisIndex
+	);
+
+	~boundaryProcessor() override = default;
+
+	add_vCtor
+	(
+		boundaryBase, 
+		boundaryProcessor, 
+		dictionary
+	);
+
+	bool beforeIteration(uint32 iterNum, real t, real dt) override;
+
+	bool iterate(uint32 iterNum, real t, real dt) override;
+
+	bool afterIteration(uint32 iterNum, real t, real dt) override;
+
+	/// @brief Return number of points in the neighbor processor boundary.
+	/// This is overriden from boundaryBase.
+	uint32 neighborProcSize() const override;
+
+	/// @brief Return a reference to point positions in the neighbor
+	/// processor boundary.
+	realx3Vector_D& neighborProcPoints() override;
+
+	/// @brief Return a const reference to point positions in the
+	/// neighbor processor boundary.
+	const realx3Vector_D& neighborProcPoints() const override;
+
+};
+
+} // namespace pFlow::MPI
+
+#endif //__boundaryProcessor_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
new file mode 100644
index 00000000..13069b2a
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
@@ -0,0 +1,108 @@
+
+#ifndef __dataReciever_hpp__
+#define __dataReciever_hpp__
+
+
+#include "span.hpp"
+#include "localProcessors.hpp"
+#include "mpiCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T, typename MemorySpace=void>
+class dataReciever
+{
+public:
+    
+    using BufferVectorType = VectorSingle<T, MemorySpace>;
+
+	using BufferVectorTypeHost = VectorSingle<T, HostSpace>;
+
+	using memory_space = typename BufferVectorType::memory_space;
+
+	using execution_space = typename BufferVectorType::execution_space;
+
+private:	
+    
+    BufferVectorType buffer_;
+
+    std::vector<T>  buffer0_;
+
+	int              fromProc_;
+
+	int              tag_;
+
+	Request          recvRequest_;
+
+public:
+
+    dataReciever(const word& name, int from, int tag)
+    :
+        buffer_(name),
+        fromProc_(from),
+        tag_(tag)
+    {}
+
+    ~dataReciever()=default;
+
+    void recieveData(
+        const localProcessors&      processors,
+        uint32 numToRecv
+    )
+    {   
+        
+        buffer0_.clear();
+		buffer0_.resize(numToRecv);
+        MPI_Status status;
+
+        /*CheckMPI(recv(
+            buffer_.getSpan(), 
+            fromProc_, 
+            tag_, 
+            processors.localCommunicator(), 
+            &status), true);*/
+        MPI_Recv(
+            buffer0_.data(),
+            buffer0_.size(),
+            realx3Type__,
+            fromProc_,
+            tag_,
+            processors.localCommunicator(),
+            &status
+        );
+        int c;
+        getCount<realx3>(&status, c);
+        pOutput<<"Number of data recieved "<<c<<endl;
+    }
+
+    auto& buffer()
+    {
+        return buffer_;
+    }
+
+    const auto& buffer()const
+    {
+        return buffer_;
+    }
+
+    uint32 waitComplete()
+    {
+        
+        /*Status status;   
+        
+        CheckMPI(MPI_Wait(&recvRequest_, &status), true);
+
+        int count;
+        CheckMPI(getCount<T>(&status, count), true);
+
+        return static_cast<uint32>(count);*/
+        return buffer_.size();
+    }
+
+};
+
+}
+
+
+#endif //__dataReciever_hpp__
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
new file mode 100644
index 00000000..11c1782f
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
@@ -0,0 +1,120 @@
+#ifndef __dataSender_hpp__
+#define __dataSender_hpp__
+
+#include "VectorSingles.hpp"
+#include "localProcessors.hpp"
+#include "mpiCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T, typename MemorySpace=void>
+class dataSender
+{
+public:
+
+	using BufferVectorType = VectorSingle<T, MemorySpace>;
+
+	using BufferVectorTypeHost = VectorSingle<T, HostSpace>;
+
+	using memory_space = typename BufferVectorType::memory_space;
+
+	using execution_space = typename BufferVectorType::execution_space;
+
+	// This is device vector 
+    
+
+private:
+
+	//BufferVectorType buffer_;
+
+    std::vector<T> buffer_;
+
+	int              toProc_;
+
+	int              tag_;
+
+	Request          sendRequest_ = RequestNull;
+
+public:
+
+    dataSender(const word& name, int toProc, int tag)
+    :
+        toProc_(toProc),
+        tag_(tag)
+    {}
+
+    ~dataSender()=default;
+
+    void sendData(
+        const localProcessors&      processors,
+        const scatteredFieldAccess<T, memory_space>&  scatterField
+    )
+    {
+        using RPolicy = Kokkos::RangePolicy<
+                DefaultExecutionSpace,
+                Kokkos::Schedule<Kokkos::Static>,
+                Kokkos::IndexType<pFlow::uint32>>;
+
+        uint32 n = scatterField.size();
+
+        // clear the buffer to prevent data copy if capacity increases 
+        buffer_.clear();
+        buffer_.resize(n);
+        
+        auto* buffView = buffer_.data();
+
+        Kokkos::parallel_for(
+            "dataSender::sendData",
+            RPolicy(0,n),
+            LAMBDA_HD(uint32 i)
+            {
+                buffView[i] = scatterField[i];
+            }
+        );
+        Kokkos::fence();
+        auto req = MPI_REQUEST_NULL;
+
+        MPI_Isend(
+            buffer_.data(),
+            buffer_.size(),
+            realx3Type__,
+            toProc_,
+            tag_,
+            processors.localCommunicator(),
+            &req);
+        
+        /*CheckMPI(send(
+            buffer_.getSpan(), 
+            toProc_, 
+            tag_, 
+            processors.localCommunicator(), 
+            MPI_STATUS_IGNORE), true);*/
+    }
+
+    /*auto& buffer()
+    {
+        return buffer_;
+    }
+
+    const auto& buffer()const
+    {
+        return buffer_;
+    }*/
+
+    bool sendComplete()
+    {
+        return true;
+        /*int test;   
+        MPI_Test(&sendRequest_, &test, StatusIgnore);
+        if(test) 
+            return true;
+        else
+            return false;*/
+    }
+
+};
+
+}
+
+#endif //__dataSender_hpp__
\ No newline at end of file

From 5f90605a4135a4fd3f3bf99429aba036c54bee51 Mon Sep 17 00:00:00 2001
From: Hamidreza Norouzi <hamid.r.norouzi@gmail.com>
Date: Sat, 27 Apr 2024 08:55:00 -0700
Subject: [PATCH 04/14] MPI-boundaries for processor

---
 .../processorBoundaryContactSearch.cpp        | 108 ++++++++++
 .../processorBoundaryContactSearch.hpp        |  74 +++++++
 .../twoPartContactSearch.cpp                  | 160 +++++++++++++++
 .../twoPartContactSearch.hpp                  | 103 ++++++++++
 .../twoPartContactSearchKernels.cpp           | 188 ++++++++++++++++++
 .../twoPartContactSearchKernels.hpp           |  49 +++++
 .../processorBoundarySIKernels.hpp            | 131 ++++++++++++
 .../processorBoundarySphereInteraction.cpp    |  73 +++++++
 .../processorBoundarySphereInteraction.hpp    |  90 +++++++++
 .../processorBoundarySphereInteractions.cpp   |  17 ++
 10 files changed, 993 insertions(+)
 create mode 100644 src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
 create mode 100644 src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp
 create mode 100644 src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
 create mode 100644 src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp
 create mode 100644 src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
 create mode 100644 src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.hpp
 create mode 100644 src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp
 create mode 100644 src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
 create mode 100644 src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
 create mode 100644 src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteractions.cpp

diff --git a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
new file mode 100644
index 00000000..9f9384e9
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
@@ -0,0 +1,108 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "processorBoundaryContactSearch.hpp"
+#include "contactSearch.hpp"
+#include "particles.hpp"
+//#include "pointStructure.hpp"
+//#include "geometry.hpp"
+
+
+void pFlow::processorBoundaryContactSearch::setSearchBox()
+{
+       
+    auto l = boundary().neighborLength();
+    auto n = boundary().boundaryPlane().normal();
+    auto pp1 = boundary().boundaryPlane().parallelPlane(l);
+    auto pp2 = boundary().boundaryPlane().parallelPlane(-l);
+
+    realx3 minP1 = min(min(min(pp1.p1(), pp1.p2()), pp1.p3()), pp1.p4());
+    realx3 maxP1 = max(max(max(pp1.p1(), pp1.p2()), pp1.p3()), pp1.p4());
+
+    realx3 minP2 = min(min(min(pp2.p1(), pp2.p2()), pp2.p3()), pp2.p4());
+    realx3 maxP2 = max(max(max(pp2.p1(), pp2.p2()), pp2.p3()), pp2.p4());
+
+    auto minP = min(minP1, minP2) - l*(realx3(1.0)-abs(n));
+    auto maxP = max(maxP1, maxP2) + l*(realx3(1.0)-abs(n));
+    
+    searchBox_={minP, maxP};
+}
+
+pFlow::processorBoundaryContactSearch::processorBoundaryContactSearch(
+    const dictionary &dict,
+    const boundaryBase &boundary,
+    const contactSearch &cSearch)
+: 
+    boundaryContactSearch(dict, boundary, cSearch),
+    diameter_(cSearch.Particles().boundingSphere()),
+    masterSearch_(this->isBoundaryMaster())
+{
+    
+    if(masterSearch_)
+    {        
+        setSearchBox();
+        
+        real minD;
+        real maxD;
+        cSearch.Particles().boundingSphereMinMax(minD, maxD);
+        
+        ppContactSearch_ = makeUnique<twoPartContactSearch>(
+            searchBox_,
+            maxD);
+    }
+    else
+    {
+        searchBox_={{0,0,0},{0,0,0}};
+    }
+}
+
+bool pFlow::processorBoundaryContactSearch::broadSearch
+(
+    uint32 iter, 
+    real t, 
+    real dt, 
+    csPairContainerType &ppPairs, 
+    csPairContainerType &pwPairs, 
+    bool force
+)
+{
+    if(masterSearch_)
+    {
+        /*const auto thisPoints = boundary().thisPoints();
+        const auto& neighborProcPoints = boundary().neighborProcPoints();
+        const auto& bDiams = diameter_.BoundaryField(thisBoundaryIndex());
+        const auto thisDiams = bDiams.thisField();
+        const auto& neighborProcDiams = bDiams.neighborProcField();
+        
+        ppContactSearch_().broadSearchPP(
+            ppPairs, 
+            thisPoints, 
+            thisDiams,
+            neighborProcPoints,
+            neighborProcDiams);
+
+        pOutput<<"ppPairs size in boundary"<< ppPairs.size()<<endl;      */
+        return true;
+
+    }else
+    {
+        return true;
+    }
+}
diff --git a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp
new file mode 100644
index 00000000..fffd576b
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp
@@ -0,0 +1,74 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __processorBoundaryContactSearch_hpp__
+#define __processorBoundaryContactSearch_hpp__
+
+#include "boundaryContactSearch.hpp"
+#include "pointFields.hpp"
+#include "twoPartContactSearch.hpp"
+
+namespace pFlow
+{
+
+class processorBoundaryContactSearch : public boundaryContactSearch
+{
+private:
+
+	box                             searchBox_;
+
+	uniquePtr<twoPartContactSearch> ppContactSearch_ = nullptr;
+
+	const realPointField_D&         diameter_;
+
+	bool                            masterSearch_;
+
+	void                            setSearchBox();
+
+public:
+
+	TypeInfo("boundaryContactSearch<MPI,processor>")
+
+	  processorBoundaryContactSearch(
+	    const dictionary&    dict,
+	    const boundaryBase&  boundary,
+	    const contactSearch& cSearch
+	  );
+
+	~processorBoundaryContactSearch() override = default;
+
+	add_vCtor(
+	  boundaryContactSearch,
+	  processorBoundaryContactSearch,
+	  boundaryBase
+	);
+
+	bool broadSearch(
+	  uint32               iter,
+	  real                 t,
+	  real                 dt,
+	  csPairContainerType& ppPairs,
+	  csPairContainerType& pwPairs,
+	  bool                 force = false
+	) override;
+};
+
+}
+
+#endif //__processorBoundaryContactSearch_hpp__
\ No newline at end of file
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
new file mode 100644
index 00000000..2f0e4089
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
@@ -0,0 +1,160 @@
+
+#include "twoPartContactSearch.hpp"
+#include "twoPartContactSearchKernels.hpp"
+#include "phasicFlowKokkos.hpp"
+#include "streams.hpp"
+
+void pFlow::twoPartContactSearch::checkAllocateNext(uint32 n)
+{
+	if( nextCapacity_ < n)
+	{
+		nextCapacity_ = n;	
+		reallocNoInit(next_, n);
+	}
+}
+
+void pFlow::twoPartContactSearch::nullifyHead()
+{
+	fill(head_, static_cast<uint32>(-1));
+}
+
+void pFlow::twoPartContactSearch::nullifyNext(uint32 n)
+{
+	fill(next_, 0u, n, static_cast<uint32>(-1));
+}
+
+void pFlow::twoPartContactSearch::buildList(
+	const deviceScatteredFieldAccess<realx3> &points)
+{
+	if(points.empty())return;
+	uint32 n = points.size();
+	checkAllocateNext(n);
+	nullifyNext(n);
+	nullifyHead();
+	
+	pFlow::twoPartContactSearchKernels::buildNextHead(
+		points,
+		searchCells_,
+		head_,
+		next_
+	);
+}
+
+pFlow::twoPartContactSearch::twoPartContactSearch
+(
+    const box &domain,
+    real cellSize,
+	real sizeRatio
+)
+:
+	searchCells_(domain, cellSize),
+	head_("periodic:head",searchCells_.nx(), searchCells_.ny(), searchCells_.nz()),
+	sizeRatio_(sizeRatio)
+{
+
+}
+
+bool pFlow::twoPartContactSearch::broadSearchPP
+(
+	csPairContainerType &ppPairs, 
+	const deviceScatteredFieldAccess<realx3> &points1, 
+	const deviceScatteredFieldAccess<real>& diams1, 
+	const deviceScatteredFieldAccess<realx3> &points2, 
+	const deviceScatteredFieldAccess<real>& diams2, 
+	const realx3& transferVec
+)
+{
+	
+	buildList(points1);
+
+	uint32 nNotInserted = 1;
+
+	// loop until the container size fits the numebr of contact pairs
+	while (nNotInserted > 0)
+	{
+
+		nNotInserted = pFlow::twoPartContactSearchKernels::broadSearchPP
+		(
+			ppPairs,
+			points1,
+			diams1,
+			points2,
+			diams2,
+			transferVec,
+			head_,
+			next_,
+			searchCells_,
+			sizeRatio_
+		);
+		
+	
+		if(nNotInserted)
+		{
+			// - resize the container
+			//   note that getFull now shows the number of failed insertions.
+			uint32 len = max(nNotInserted,100u) ;
+			
+			auto oldCap = ppPairs.capacity();
+			
+			ppPairs.increaseCapacityBy(len);
+
+			INFORMATION<< "Particle-particle contact pair container capacity increased from "<<
+			oldCap << " to "<<ppPairs.capacity()<<" in peiodicBoundaryContactSearch."<<END_INFO;
+			
+		}
+
+	}	
+	
+    return true;
+}
+
+bool pFlow::twoPartContactSearch::broadSearchPP
+(
+	csPairContainerType &ppPairs,
+	const deviceScatteredFieldAccess<realx3> &points1,
+	const deviceScatteredFieldAccess<real> &diams1,
+	const realx3Vector_D& points2,
+	const realVector_D& diams2
+)
+{
+	buildList(points1);
+
+	uint32 nNotInserted = 1;
+
+	// loop until the container size fits the numebr of contact pairs
+	while (nNotInserted > 0)
+	{
+
+		nNotInserted = pFlow::twoPartContactSearchKernels::broadSearchPP
+		(
+			ppPairs,
+			points1,
+			diams1,
+			points2,
+			diams2,
+			head_,
+			next_,
+			searchCells_,
+			sizeRatio_
+		);
+		
+	
+		if(nNotInserted)
+		{
+			// - resize the container
+			//   note that getFull now shows the number of failed insertions.
+			uint32 len = max(nNotInserted,100u) ;
+			
+			auto oldCap = ppPairs.capacity();
+			
+			ppPairs.increaseCapacityBy(len);
+
+			INFORMATION<< "Particle-particle contact pair container capacity increased from "<<
+			oldCap << " to "<<ppPairs.capacity()<<" in peiodicBoundaryContactSearch."<<END_INFO;
+			
+		}
+
+	}	
+	
+    return true;
+}
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp
new file mode 100644
index 00000000..53aa1ce4
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp
@@ -0,0 +1,103 @@
+/*------------------------------- phasicFlow ---------------------------------
+	  O        C enter of
+	 O O       E ngineering and
+	O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __twoPartContactSearch_hpp__
+#define __twoPartContactSearch_hpp__
+
+#include "contactSearchGlobals.hpp"
+#include "scatteredFieldAccess.hpp"
+#include "cells.hpp"
+#include "VectorSingles.hpp"
+
+namespace pFlow
+{
+
+class twoPartContactSearch
+{
+public:
+	using HeadType = deviceViewType3D<uint32>;
+
+	using NextType = deviceViewType1D<uint32>;
+
+private:
+
+	cells    searchCells_;
+
+	HeadType head_{ "periodic::head", 1, 1, 1 };
+
+	NextType next_{ "periodic::next", 1 };
+
+	real     sizeRatio_ = 1.0;
+
+	uint32   nextCapacity_ = 0;
+
+	void checkAllocateNext(uint32 n);
+
+	void nullifyHead();
+
+	void nullifyNext(uint32 n);
+
+	void buildList(
+		const deviceScatteredFieldAccess<realx3> &points);
+
+public:
+	twoPartContactSearch(
+		const box &domain,
+		real cellSize,
+		real sizeRatio = 1.0);
+
+	/// @brief Perform a broad-search for spheres in two adjacent regions.
+	/// Region 1 is considered as the master (primary) region and region 2 as slave 
+	/// @param ppPairs pairs container which holds i and j
+	/// @param points1 point positions in region 1
+	/// @param diams1  diameter of spheres in region 1
+	/// @param points2 point positions in region 2
+	/// @param diams2 diameter of spheres in region 2
+	/// @param transferVec a vector to transfer points from region 2 to region 1
+	/// @return true if it is successful 
+	bool broadSearchPP(
+		csPairContainerType &ppPairs,
+		const deviceScatteredFieldAccess<realx3> &points1,
+		const deviceScatteredFieldAccess<real> &diams1,
+		const deviceScatteredFieldAccess<realx3> &points2,
+		const deviceScatteredFieldAccess<real> &diams2,
+		const realx3 &transferVec);
+	
+	bool broadSearchPP(
+		csPairContainerType &ppPairs,
+		const deviceScatteredFieldAccess<realx3> &points1,
+		const deviceScatteredFieldAccess<real> &diams1,
+		const realx3Vector_D& points2,
+		const realVector_D& diams2);
+
+	const auto& searchCells()const
+	{
+		return searchCells_;
+	}
+
+	real sizeRatio()const
+	{
+		return sizeRatio_;
+	}
+};
+
+}
+
+#endif //__twoPartContactSearch_hpp__
\ No newline at end of file
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
new file mode 100644
index 00000000..56f1885d
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
@@ -0,0 +1,188 @@
+#include "twoPartContactSearchKernels.hpp"
+
+INLINE_FUNCTION_HD
+bool
+sphereSphereCheckB(
+  const pFlow::realx3& p1,
+  const pFlow::realx3  p2,
+  pFlow::real          d1,
+  pFlow::real          d2
+)
+{
+	return pFlow::length(p2 - p1) < 0.5 * (d2 + d1);
+}
+
+void
+pFlow::twoPartContactSearchKernels::buildNextHead(
+  const deviceScatteredFieldAccess<realx3>& points,
+  const cells&                              searchCells,
+  deviceViewType3D<uint32>&                 head,
+  deviceViewType1D<uint32>&                 next
+)
+{
+	if (points.empty())
+		return;
+
+	uint32 n = points.size();
+
+	Kokkos::parallel_for(
+	  "pFlow::ppwBndryContactSearch::buildList",
+	  deviceRPolicyStatic(0, n),
+	  LAMBDA_HD(uint32 i) {
+		  int32x3 ind;
+		  if (searchCells.pointIndexInDomain(points[i], ind))
+		  {
+			  // discards points out of searchCell
+			  uint32 old =
+			    Kokkos::atomic_exchange(&head(ind.x(), ind.y(), ind.z()), i);
+			  next[i] = old;
+		  }
+	  }
+	);
+	Kokkos::fence();
+}
+
+pFlow::uint32
+pFlow::twoPartContactSearchKernels::broadSearchPP(
+  csPairContainerType&                      ppPairs,
+  const deviceScatteredFieldAccess<realx3>& points,
+  const deviceScatteredFieldAccess<real>&   diams,
+  const deviceScatteredFieldAccess<realx3>& mirrorPoints,
+  const deviceScatteredFieldAccess<real>&   mirrorDiams,
+  const realx3&                             transferVec,
+  const deviceViewType3D<uint32>&           head,
+  const deviceViewType1D<uint32>&           next,
+  const cells&                              searchCells,
+  const real                                sizeRatio
+)
+{
+	if (points.empty())
+		return 0;
+	if (mirrorPoints.empty())
+		return 0;
+
+	auto   nMirror = mirrorPoints.size();
+
+	uint32 getFull = 0;
+
+	Kokkos::parallel_reduce(
+	  "pFlow::twoPartContactSearchKernels::broadSearchPP",
+	  deviceRPolicyStatic(0, nMirror),
+	  LAMBDA_HD(const uint32 mrrI, uint32& getFullUpdate) {
+		  realx3  p_m = mirrorPoints(mrrI) + transferVec;
+
+		  int32x3 ind_m;
+		  if (!searchCells.pointIndexInDomain(p_m, ind_m))
+			  return;
+
+		  real d_m = sizeRatio * mirrorDiams[mrrI];
+
+		  for (int ii = -1; ii < 2; ii++)
+		  {
+			  for (int jj = -1; jj < 2; jj++)
+			  {
+				  for (int kk = -1; kk < 2; kk++)
+				  {
+					  auto ind = ind_m + int32x3{ ii, jj, kk };
+
+					  if (!searchCells.inCellRange(ind))
+						  continue;
+
+					  uint32 thisI = head(ind.x(), ind.y(), ind.z());
+					  while (thisI != -1)
+					  {
+						  auto d_n = sizeRatio * diams[thisI];
+
+						  // first item is for this boundary and second itme,
+						  // for mirror
+						  if(sphereSphereCheckB(p_m, points[thisI], d_m, d_n)&&
+                       ppPairs.insert(thisI,mrrI) == -1)
+						  {
+							  getFullUpdate++;
+						  }
+
+						  thisI = next(thisI);
+					  }
+				  }
+			  }
+		  }
+	  },
+	  getFull
+	);
+
+	return getFull;
+}
+
+pFlow::uint32
+pFlow::twoPartContactSearchKernels::broadSearchPP(
+  csPairContainerType&                      ppPairs,
+  const deviceScatteredFieldAccess<realx3>& points1,
+  const deviceScatteredFieldAccess<real>&   diams1,
+  const realx3Vector_D&                     points2,
+  const realVector_D&                       diams2,
+  const deviceViewType3D<uint32>&           head,
+  const deviceViewType1D<uint32>&           next,
+  const cells&                              searchCells,
+  real                                      sizeRatio
+)
+{
+	if (points1.empty())
+		return 0;
+	if (points2.empty())
+		return 0;
+
+	auto   nP2         = points2.size();
+	auto   points2View = points2.deviceView();
+	auto   diams2View  = diams2.deviceView();
+
+	uint32 getFull = 0;
+
+	Kokkos::parallel_reduce(
+	  "pFlow::twoPartContactSearchKernels::broadSearchPP",
+	  deviceRPolicyStatic(0, nP2),
+	  LAMBDA_HD(const uint32 i2, uint32& getFullUpdate) {
+		  realx3  p_m = points2View(i2);
+
+		  int32x3 ind_m;
+		  if (!searchCells.pointIndexInDomain(p_m, ind_m))
+			  return;
+
+		  real d_m = sizeRatio * diams2View[i2];
+
+		  for (int ii = -1; ii < 2; ii++)
+		  {
+			  for (int jj = -1; jj < 2; jj++)
+			  {
+				  for (int kk = -1; kk < 2; kk++)
+				  {
+					  auto ind = ind_m + int32x3{ ii, jj, kk };
+
+					  if (!searchCells.inCellRange(ind))
+					  {
+						  continue;
+					  }
+
+					  uint32 i1 = head(ind.x(), ind.y(), ind.z());
+					  while (i1 != -1)
+					  {
+						  auto d_n = sizeRatio * diams1[i1];
+
+						  // first item is for this boundary and second itme,
+						  // for mirror
+						  if(sphereSphereCheckB(p_m, points1[i1], d_m, d_n)&&
+                       ppPairs.insert(i1,i2) == -1)
+						  {
+							  getFullUpdate++;
+						  }
+
+						  i1 = next(i1);
+					  }
+				  }
+			  }
+		  }
+	  },
+	  getFull
+	);
+
+	return getFull;
+}
\ No newline at end of file
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.hpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.hpp
new file mode 100644
index 00000000..42f7cda1
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.hpp
@@ -0,0 +1,49 @@
+#ifndef __twoPartContactSearchKernels_hpp__
+#define __twoPartContactSearchKernels_hpp__
+
+#include "contactSearchGlobals.hpp"
+#include "cells.hpp"
+#include "contactSearchFunctions.hpp"
+#include "scatteredFieldAccess.hpp"
+#include "VectorSingles.hpp"
+
+namespace pFlow::twoPartContactSearchKernels
+{
+
+void buildNextHead(
+    const deviceScatteredFieldAccess<realx3> &points,
+    const cells              &searchCells,
+    deviceViewType3D<uint32> &head, 
+    deviceViewType1D<uint32> &next );
+
+
+uint32 broadSearchPP
+(
+	csPairContainerType 						&ppPairs,
+	const deviceScatteredFieldAccess<realx3> 	&points, 
+	const deviceScatteredFieldAccess<real> 		&diams, 
+	const deviceScatteredFieldAccess<realx3>	&mirrorPoints, 
+	const deviceScatteredFieldAccess<real> 		&mirrorDiams, 
+	const realx3 								&transferVec,
+    const deviceViewType3D<uint32>				&head,
+    const deviceViewType1D<uint32>				&next,
+    const cells									&searchCells,
+	real sizeRatio
+);
+
+uint32
+broadSearchPP(
+	csPairContainerType&                      ppPairs,
+	const deviceScatteredFieldAccess<realx3>& points1,
+	const deviceScatteredFieldAccess<real>&   diams1,
+	const realx3Vector_D&                     points2,
+	const realVector_D&                       diams2,
+	const deviceViewType3D<uint32>&           head,
+	const deviceViewType1D<uint32>&           next,
+	const cells&                              searchCells,
+	real                                      sizeRatio
+);
+}
+
+
+#endif //__twoPartContactSearchKernels_hpp__
\ No newline at end of file
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp
new file mode 100644
index 00000000..a62f3166
--- /dev/null
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp
@@ -0,0 +1,131 @@
+
+#ifndef __processorBoundarySIKernels_hpp__
+#define __processorBoundarySIKernels_hpp__
+
+namespace pFlow::MPI::processorBoundarySIKernels
+{
+
+template<typename ContactListType, typename ContactForceModel>
+inline
+void sphereSphereInteraction
+(
+    real dt,
+    const ContactListType&              cntctList,
+    const ContactForceModel&            forceModel,
+    const deviceScatteredFieldAccess<realx3>& thisPoints,
+    const deviceViewType1D<real>& thisDiam,
+    const deviceViewType1D<uint32>& thisPropId,
+    const deviceViewType1D<realx3>& thisVel,
+    const deviceViewType1D<realx3>& thisRVel,
+    const deviceViewType1D<realx3>& thisCForce,
+    const deviceViewType1D<realx3>& thisCTorque,
+    const deviceViewType1D<realx3>& neighborPoints,
+    const deviceViewType1D<real>& neighborDiam,
+    const deviceViewType1D<uint32>& neighborPropId,
+    const deviceViewType1D<realx3>& neighborVel,
+    const deviceViewType1D<realx3>& neighborRVel,
+    const deviceViewType1D<realx3>& neighborCForce,
+    const deviceViewType1D<realx3>& neighborCTorque
+)
+{
+    
+    using ValueType = typename ContactListType::ValueType;
+    uint32 ss = cntctList.size();
+    if(ss == 0u)return;
+
+    uint32 lastItem = cntctList.loopCount();
+
+    Kokkos::parallel_for(
+        "pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction",
+        deviceRPolicyDynamic(0,lastItem),
+        LAMBDA_HD(uint32 n)
+        {
+
+        if(!cntctList.isValid(n))return;
+
+            auto [i,j] = cntctList.getPair(n);
+            uint32 ind_i = thisPoints.index(i);
+            uint32 ind_j = j;
+
+            real Ri = 0.5*thisDiam[ind_i];
+            real Rj = 0.5*neighborDiam[ind_j];
+            realx3 xi = thisPoints.field()[ind_i];
+            realx3 xj = neighborPoints[ind_j];
+
+            real dist = length(xj-xi);
+            real ovrlp = (Ri+Rj) - dist;
+            
+            if( ovrlp >0.0 )
+            {
+                auto Nij = (xj-xi)/max(dist,smallValue);
+                auto wi = thisRVel[ind_i];
+                auto wj = neighborRVel[ind_j];
+                auto Vr = thisVel[ind_i] - neighborVel[ind_j] + cross((Ri*wi+Rj*wj), Nij);
+                
+                auto history = cntctList.getValue(n);
+
+                int32 propId_i = thisPropId[ind_i];
+                int32 propId_j = neighborPropId[ind_j];
+
+                realx3 FCn, FCt, Mri, Mrj, Mij, Mji;
+
+                // calculates contact force 
+                forceModel.contactForce(
+                    dt, i, j,
+                    propId_i, propId_j,
+                    Ri, Rj,
+                    ovrlp,
+                    Vr, Nij,
+                    history,
+                    FCn, FCt);
+
+                forceModel.rollingFriction(
+                    dt, i, j,
+                    propId_i, propId_j,
+                    Ri, Rj,
+                    wi, wj,
+                    Nij,
+                    FCn,
+                    Mri, Mrj);
+
+                auto M = cross(Nij,FCt);
+                Mij = Ri*M+Mri;
+                Mji = Rj*M+Mrj;
+                
+                auto FC = FCn + FCt;
+                
+
+                Kokkos::atomic_add(&thisCForce[ind_i].x_,FC.x_);
+                Kokkos::atomic_add(&thisCForce[ind_i].y_,FC.y_);
+                Kokkos::atomic_add(&thisCForce[ind_i].z_,FC.z_);
+
+                Kokkos::atomic_add(&neighborCForce[ind_j].x_,-FC.x_);
+                Kokkos::atomic_add(&neighborCForce[ind_j].y_,-FC.y_);
+                Kokkos::atomic_add(&neighborCForce[ind_j].z_,-FC.z_);
+
+                Kokkos::atomic_add(&thisCTorque[ind_i].x_, Mij.x_);
+                Kokkos::atomic_add(&thisCTorque[ind_i].y_, Mij.y_);
+                Kokkos::atomic_add(&thisCTorque[ind_i].z_, Mij.z_);
+
+                Kokkos::atomic_add(&neighborCTorque[ind_j].x_, Mji.x_);
+                Kokkos::atomic_add(&neighborCTorque[ind_j].y_, Mji.y_);
+                Kokkos::atomic_add(&neighborCTorque[ind_j].z_, Mji.z_);
+                
+
+                cntctList.setValue(n,history);
+
+            }
+            else
+            {
+                cntctList.setValue(n, ValueType());
+            }
+
+        });
+    Kokkos::fence();
+}
+
+
+} //pFlow::MPI::processorBoundarySIKernels
+
+
+#endif //__processorBoundarySIKernels_hpp__
\ No newline at end of file
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
new file mode 100644
index 00000000..ef09f0b5
--- /dev/null
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
@@ -0,0 +1,73 @@
+/*------------------------------- phasicFlow ---------------------------------
+	  O        C enter of
+	 O O       E ngineering and
+	O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "processorBoundarySIKernels.hpp"
+
+template <typename cFM, typename gMM>
+pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::processorBoundarySphereInteraction(
+	const boundaryBase &boundary,
+	const sphereParticles &sphPrtcls,
+	const GeometryMotionModel &geomMotion)
+: 
+	boundarySphereInteraction<cFM,gMM>(
+		boundary, 
+		sphPrtcls, 
+		geomMotion
+	),
+	masterInteraction_(boundary.isBoundaryMaster())
+{
+	pOutput<<"Processor boundayrCondition for "<< boundary.name()<<endl;
+}
+
+template <typename cFM, typename gMM>
+bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInteraction
+(
+	real dt,
+	const ContactForceModel &cfModel
+)
+{
+	if(!masterInteraction_) return true;
+	
+	const auto & sphPar = this->sphParticles();
+	uint32 thisIndex = this->boundary().thisBoundaryIndex();
+	const auto& a = sphPar.diameter().BoundaryField(thisIndex).neighborProcField().deviceViewAll();
+
+	/*pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
+		dt,
+		this->ppPairs(),
+		cfModel,
+		this->boundary().thisPoints(),
+		sphPar.diameter().deviceViewAll(),
+		sphPar.propertyId().deviceViewAll(),
+		sphPar.velocity().deviceViewAll(),
+		sphPar.rVelocity().deviceViewAll(),
+		sphPar.contactForce().deviceViewAll(),
+		sphPar.contactTorque().deviceViewAll(),
+		this->boundary().neighborProcPoints().deviceViewAll(),
+		sphPar.diameter().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+		sphPar.propertyId().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+		sphPar.velocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+		sphPar.rVelocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+		sphPar.contactForce().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+		sphPar.contactTorque().BoundaryField(thisIndex).neighborProcField().deviceViewAll()
+	);*/
+
+	return true;
+}
\ No newline at end of file
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
new file mode 100644
index 00000000..d3c56c04
--- /dev/null
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
@@ -0,0 +1,90 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __processorBoundarySphereInteraction_hpp__
+#define __processorBoundarySphereInteraction_hpp__
+
+#include "boundarySphereInteraction.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename contactForceModel,typename geometryMotionModel>
+class processorBoundarySphereInteraction
+:
+    public boundarySphereInteraction<contactForceModel, geometryMotionModel>
+{
+public:
+
+    using PBSInteractionType = 
+        processorBoundarySphereInteraction<contactForceModel,geometryMotionModel>;
+
+    using BSInteractionType = 
+        boundarySphereInteraction<contactForceModel, geometryMotionModel>;
+
+    using GeometryMotionModel 	= typename BSInteractionType::GeometryMotionModel;
+	
+	using ContactForceModel 	= typename BSInteractionType::ContactForceModel;
+
+	using MotionModel 			= typename geometryMotionModel::MotionModel;
+
+	using ModelStorage 			= typename ContactForceModel::contactForceStorage;
+	
+	using IdType 			    = typename BSInteractionType::IdType;
+
+	using IndexType    		    = typename BSInteractionType::IndexType;
+    
+    using ContactListType       = typename BSInteractionType::ContactListType; 
+
+private:
+    
+    bool    masterInteraction_;
+
+public:
+
+    TypeInfoTemplate22("boundarySphereInteraction", "processor",ContactForceModel, MotionModel);
+
+	
+    processorBoundarySphereInteraction(
+        const boundaryBase& boundary,
+		const sphereParticles& sphPrtcls,
+		const GeometryMotionModel& geomMotion
+    );
+
+    add_vCtor
+    (
+        BSInteractionType,
+        PBSInteractionType,
+        boundaryBase
+    );
+    
+    ~processorBoundarySphereInteraction()override = default;
+
+    bool sphereSphereInteraction(
+        real dt,
+		const ContactForceModel& cfModel)override;
+	
+};
+
+}
+
+#include "processorBoundarySphereInteraction.cpp"
+
+
+#endif //__processorBoundarySphereInteraction_hpp__
\ No newline at end of file
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteractions.cpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteractions.cpp
new file mode 100644
index 00000000..25347d61
--- /dev/null
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteractions.cpp
@@ -0,0 +1,17 @@
+
+#include "processorBoundarySphereInteraction.hpp"
+#include "geometryMotions.hpp"
+#include "contactForceModels.hpp"
+
+
+template class pFlow::MPI::processorBoundarySphereInteraction
+<
+    pFlow::cfModels::limitedNonLinearNormalRolling,
+    pFlow::rotationAxisMotionGeometry
+>;
+
+template class pFlow::MPI::processorBoundarySphereInteraction
+<
+    pFlow::cfModels::nonLimitedNonLinearNormalRolling,
+    pFlow::rotationAxisMotionGeometry
+>;
\ No newline at end of file

From 6241fa6dd3e064146f7d2240f6ac6c806e8fbdb2 Mon Sep 17 00:00:00 2001
From: Hamidreza Norouzi <hamid.r.norouzi@gmail.com>
Date: Sat, 27 Apr 2024 08:59:13 -0700
Subject: [PATCH 05/14] MPI particle id handler

---
 .../MPIParticleIdHandler.cpp                  | 70 +++++++++++++++++++
 .../MPIParticleIdHandler.hpp                  | 60 ++++++++++++++++
 2 files changed, 130 insertions(+)
 create mode 100644 src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.cpp
 create mode 100644 src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.hpp

diff --git a/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.cpp b/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.cpp
new file mode 100644
index 00000000..056d314c
--- /dev/null
+++ b/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.cpp
@@ -0,0 +1,70 @@
+#include "MPIParticleIdHandler.hpp"
+#include "procCommunication.hpp"
+
+pFlow::MPI::MPIParticleIdHandler::MPIParticleIdHandler
+(
+    pointStructure& pStruct
+)
+:
+    particleIdHandler(pStruct)
+{
+    initialIdCheck();
+}
+
+pFlow::Pair<pFlow::uint32, pFlow::uint32> 
+    pFlow::MPI::MPIParticleIdHandler::getIdRange(uint32 nNewParticles)
+{
+    uint32 startId;
+    if(maxId_==-1)
+    {
+        startId = 0;
+    }
+    else
+    {
+        startId = maxId_+1;
+    }
+    uint32 endId = startId+nNewParticles-1;
+    maxId_ = endId;
+    return {startId, endId};
+}
+
+bool pFlow::MPI::MPIParticleIdHandler::initialIdCheck()
+{
+    /// empty point structure / no particles in simulation
+    uint32 maxId = -1;
+    if( !pStruct().empty() ) 
+    {
+        maxId = max( *this );
+    }    
+    
+    auto maxIdAll = procVector<uint32>(pFlowProcessors());
+    auto numAll = procVector<uint32>(pFlowProcessors());
+    auto comm = procCommunication(pFlowProcessors());
+
+    comm.collectAllToAll(maxId, maxIdAll);
+    comm.collectAllToAll(size(),numAll);
+    
+    uint32 n = 0;
+    for(uint32 i=0; i<maxIdAll.size(); i++)
+    {
+        if( maxIdAll[i]==-1 && numAll[i]!= 0)
+        {
+            if(comm.localRank() == i)
+            {
+                fillSequence(*this, n);
+                maxId_ = size()-1 + n;
+            }
+            
+        }
+        else
+        {
+            if(comm.localRank() == i)
+            {
+                maxId_ = maxIdAll[i];
+            }
+        }
+        n += numAll[i];
+    }
+    
+    return true;
+}
diff --git a/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.hpp b/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.hpp
new file mode 100644
index 00000000..2931b213
--- /dev/null
+++ b/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.hpp
@@ -0,0 +1,60 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __MPIParticleIdHandler_hpp__
+#define __MPIParticleIdHandler_hpp__
+
+#include "particleIdHandler.hpp"
+
+namespace pFlow::MPI
+{
+
+class MPIParticleIdHandler : public particleIdHandler
+{
+private:
+
+	uint32 maxId_ = -1;
+
+	bool   initialIdCheck() override;
+
+public:
+
+	ClassInfo("particleIdHandler<MPI>");
+
+	explicit MPIParticleIdHandler(pointStructure& pStruct);
+
+	~MPIParticleIdHandler() override = default;
+
+	add_vCtor(
+    particleIdHandler, 
+    MPIParticleIdHandler, 
+    pointStructure
+  );
+
+	Pair<uint32, uint32> getIdRange(uint32 nNewParticles) override;
+
+	uint32               maxId() const override
+	{
+		return maxId_;
+	}
+};
+
+}
+
+#endif //__MPIParticleIdHandler_hpp__
\ No newline at end of file

From d0798dfc0b09605537c7df585d70bc3934e9cf14 Mon Sep 17 00:00:00 2001
From: Hamidreza Norouzi <hamid.r.norouzi@gmail.com>
Date: Sat, 27 Apr 2024 09:11:09 -0700
Subject: [PATCH 06/14] clean up of un-used codes

---
 .../MPIParallelization/CMakeLists.txt         |  36 --
 .../MPIParallelization/boundaryProcessor.cpp  |  61 ---
 .../MPIParallelization/boundaryProcessor.hpp  |  67 ----
 .../MPIParallelization/dataIOMPI.cpp          |   5 -
 .../MPIParallelization/dataIOMPI.hpp          |  97 -----
 .../MPIParallelization/gatherMaster.hpp       | 105 -----
 .../MPIParallelization/mpiCommunication.hpp   | 377 ------------------
 .../MPIParallelization/mpiTypes.hpp           |  75 ----
 .../MPIParallelization/partitioning.cpp       | 113 ------
 .../MPIParallelization/partitioning.hpp       | 168 --------
 .../MPIParallelization/procCommunication.cpp  |  30 --
 .../MPIParallelization/procCommunication.hpp  | 178 ---------
 .../MPIParallelization/procVector.hpp         | 199 ---------
 .../processorBoundaryField.cpp                |  29 --
 .../processorBoundaryField.hpp                |  80 ----
 .../processorBoundaryFields.cpp               |  10 -
 .../MPIParallelization/rcb1DPartitioning.cpp  | 325 ---------------
 .../MPIParallelization/rcb1DPartitioning.hpp  | 240 -----------
 .../scatteredMasterDistribute.cpp             | 158 --------
 .../scatteredMasterDistribute.hpp             |  69 ----
 .../scatteredMasterDistributeChar.cpp         | 166 --------
 .../scatteredMasterDistributeChar.hpp         |  67 ----
 22 files changed, 2655 deletions(-)
 delete mode 100644 src/phasicFlow/MPIParallelization/CMakeLists.txt
 delete mode 100644 src/phasicFlow/MPIParallelization/boundaryProcessor.cpp
 delete mode 100644 src/phasicFlow/MPIParallelization/boundaryProcessor.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/dataIOMPI.cpp
 delete mode 100644 src/phasicFlow/MPIParallelization/dataIOMPI.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/gatherMaster.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/mpiCommunication.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/mpiTypes.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/partitioning.cpp
 delete mode 100644 src/phasicFlow/MPIParallelization/partitioning.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/procCommunication.cpp
 delete mode 100644 src/phasicFlow/MPIParallelization/procCommunication.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/procVector.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/processorBoundaryField.cpp
 delete mode 100644 src/phasicFlow/MPIParallelization/processorBoundaryField.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/processorBoundaryFields.cpp
 delete mode 100644 src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp
 delete mode 100644 src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/scatteredMasterDistribute.cpp
 delete mode 100644 src/phasicFlow/MPIParallelization/scatteredMasterDistribute.hpp
 delete mode 100644 src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.cpp
 delete mode 100644 src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.hpp

diff --git a/src/phasicFlow/MPIParallelization/CMakeLists.txt b/src/phasicFlow/MPIParallelization/CMakeLists.txt
deleted file mode 100644
index 32ab1c6b..00000000
--- a/src/phasicFlow/MPIParallelization/CMakeLists.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-#add Zoltan
-set(Zoltan_Install_DIR)
-if(DEFINED ENV{Zoltan_DIR})
-   set(Zoltan_Install_DIR $ENV{Zoltan_DIR})
-else()
-  set(Zoltan_Install_DIR $ENV{HOME}/PhasicFlow/Zoltan)
-endif()
-message(STATUS "Zoltan install directory is ${Zoltan_Install_DIR}")
-
-set(ZOLTAN_PREFIX "${Zoltan_Install_DIR}" CACHE STRING "Zoltan install directory")
-
-find_path(ZOLTAN_INCLUDE_DIR zoltan.h PATHS "${ZOLTAN_PREFIX}/include")
-
-message(STATUS "Zoltan include path: ${ZOLTAN_INCLUDE_DIR}")
-
-find_library(ZOLTAN_LIBRARY zoltan PATHS "${ZOLTAN_PREFIX}/lib")
-message(STATUS "Zoltan lib path: ${ZOLTAN_LIBRARY}")
-
-
-set(SourceFiles 
-    partitioning.cpp 
-    rcb1DPartitioning.cpp
-    domain/MPISimulationDomain.cpp)
-
-set(link_libs Kokkos::kokkos phasicFlow PRIVATE MPI::MPI_CXX ${ZOLTAN_LIBRARY} -lm )
-
-pFlow_add_library_install(MPIParallelization SourceFiles link_libs)
-target_include_directories(MPIParallelization PUBLIC ${ZOLTAN_INCLUDE_DIR})
-
-
-
-
-
-
-
-
diff --git a/src/phasicFlow/MPIParallelization/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/boundaryProcessor.cpp
deleted file mode 100644
index a5622691..00000000
--- a/src/phasicFlow/MPIParallelization/boundaryProcessor.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-
-#include "boundaryProcessor.hpp"
-#include "dictionary.hpp"
-
-pFlow::boundaryProcessor::boundaryProcessor
-(
-	const dictionary& dict,
-	const plane&      bplane,
-	internalPoints&   internal
-)
-:
-	boundaryBase(dict, bplane, internal)
-{
-	
-}
-
-bool pFlow::boundaryProcessor::beforeIteratoin
-(
-	uint32 iterNum, 
-	real t
-)
-{
-	return true;
-}
-
-bool pFlow::boundaryProcessor::iterate
-(
-	uint32 iterNum, 
-	real t
-)
-{
-	return true;
-}
-
-bool pFlow::boundaryProcessor::afterIteration
-(
-	uint32 iterNum, 
-	real t
-)
-{
-	return true;
-}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/boundaryProcessor.hpp b/src/phasicFlow/MPIParallelization/boundaryProcessor.hpp
deleted file mode 100644
index 66b3b468..00000000
--- a/src/phasicFlow/MPIParallelization/boundaryProcessor.hpp
+++ /dev/null
@@ -1,67 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-
-#ifndef __boundaryProcessor_hpp__
-#define __boundaryProcessor_hpp__
-
-
-#include "boundaryBase.hpp"
-
-namespace pFlow
-{
-
-class boundaryProcessor
-:
- 	public boundaryBase
-{
-protected:
-
-	
-public:
-
-	TypeInfo("boundary<processor>");
-
-	boundaryProcessor(
-		const dictionary& 	dict,
-		const plane&    	bplane,
-		internalPoints& 	internal);
-
-	virtual 
-	~boundaryProcessor() = default;
-	
-	add_vCtor
-	(
-		boundaryBase,
-		boundaryProcessor,
-		dictionary
-	);
-
-	bool beforeIteratoin(uint32 iterNum, real t) override;
-
-	bool iterate(uint32 iterNum, real t) override;
-
-	bool afterIteration(uint32 iterNum, real t) override;
-
-
-};
-
-}
-
-#endif //__boundaryProcessor_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/dataIOMPI.cpp b/src/phasicFlow/MPIParallelization/dataIOMPI.cpp
deleted file mode 100644
index 30fd93cf..00000000
--- a/src/phasicFlow/MPIParallelization/dataIOMPI.cpp
+++ /dev/null
@@ -1,5 +0,0 @@
-
-#include "gatherMaster.hpp"
-
-
-
diff --git a/src/phasicFlow/MPIParallelization/dataIOMPI.hpp b/src/phasicFlow/MPIParallelization/dataIOMPI.hpp
deleted file mode 100644
index 850cf69b..00000000
--- a/src/phasicFlow/MPIParallelization/dataIOMPI.hpp
+++ /dev/null
@@ -1,97 +0,0 @@
-#ifndef __datIOMPI_hpp__
-#define __datIOMPI_hpp__
-
-#include "dataIO.hpp"
-#include "pFlowProcessors.hpp"
-
-#ifdef pFlow_Build_MPI
-    #include "gatherMaster.hpp"
-#endif
-
-namespace pFlow
-{
-
-template<typename T>
-class dataIOMPI
-:
-	public dataIO<T>
-{
-protected:
-    
-    bool gatherData(span<T> data ) override
-    {
-        
-        if(this->ioPattern_.isAllProcessorsDifferent())
-        {
-            this->bufferSpan_ = data;
-            return true;
-        }
-        
-        if( this->ioPattern_.isMasterProcessorDistribute())
-        {
-
-#ifdef pFlow_Build_MPI
-            
-            auto gatherT = pFlow::MPI::gatherMaster<T>(pFlowProcessors());
-            
-            if(!gatherT.gatherData(data))
-            {
-                fatalErrorInFunction<<"Error in gathering data to master"<<endl;
-                return false;
-            }
-
-            this->buffer_ = gatherT.moveData();
-
-            this->bufferSpan_ = makeSpan(this->buffer_);
-
-            return true;
-#else
-            notImplementedFunction;
-            fatalExit;
-            return false;
-#endif //pFlow_Build_MPI
-        
-        }
-
-        if( this->ioPattern_.isMasterProcessorOnly() || this->ioPattern_.isAllProcessorSimilar() )
-        {
-            if( this->ioPattern_.isMaster() )
-            {
-                this->bufferSpan_ = data;
-            }
-            else
-            {
-                this->bufferSpan_ = span<T>(nullptr, 0);
-                return true;
-            }
-        }
-
-        return false;
-    }
-public:
-
-	TypeInfo("dataIO<MPI>");
-
-	dataIOMPI(const IOPattern& iop)
-    :
-        dataIO<T>(iop)
-    {}
-
-	dataIOMPI(const dataIOMPI&) = default;
-
-	dataIOMPI(dataIOMPI&&) = default;
-
-
-	dataIOMPI& operator=(const dataIOMPI&) = default;
-
-	dataIOMPI& operator=(dataIOMPI&&) = default;
-
-	~dataIOMPI() = default;
-
-};
-
-
-}
-
-
-#endif
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/gatherMaster.hpp b/src/phasicFlow/MPIParallelization/gatherMaster.hpp
deleted file mode 100644
index ca1ecc77..00000000
--- a/src/phasicFlow/MPIParallelization/gatherMaster.hpp
+++ /dev/null
@@ -1,105 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-#ifndef __gatherMaster_hpp__
-#define __gatherMaster_hpp__
-
-#include <numeric>
-
-#include "procCommunication.hpp"
-
-namespace pFlow::MPI
-{
-
-template<typename T>
-class gatherMaster
-:
-    public procCommunication
-{
-protected:
-
-    std::vector<T> buffer_;
-
-public:
-
-    gatherMaster(const localProcessors& procs)
-    :
-        procCommunication(procs)
-    {}
-
-    span<T> getData()
-    {
-        if(this->localMaster())
-            return span<T>( buffer_.data(), buffer_.size());
-        else
-            return span<T>(nullptr, 0);
-    }
-
-    std::vector<T> moveData()
-    {
-        return std::move(buffer_);
-    }
-
-    bool gatherData(span<T> data)
-    {
-        int thisN = data.size();
-
-        bool succss;
-        
-        procVector<int> numElems(this->processors(), true);
-        procVector<int> displ(this->processors(), true);      
-        
-        if( !this->collectAllToMaster(thisN, numElems) )
-        {
-            fatalErrorInFunction<<
-            "error in collecting number of elements from processors"<<endl;
-            return false;
-        }
-        auto totalN = std::accumulate(
-            numElems.begin(), 
-            numElems.end(),
-            static_cast<int>(0));
-        
-        buffer_.resize(totalN);
-    
-        std::exclusive_scan(
-            numElems.begin(), 
-            numElems.end(),
-            displ.begin(),
-            0);
-        
-        auto bufferSpan = makeSpan(buffer_);
-
-        return CheckMPI( 
-            Gatherv(
-                data, 
-                bufferSpan, 
-                makeSpan(numElems), 
-                makeSpan(displ), 
-                this->localMasterNo(), 
-                this->localCommunicator()),
-            false);
-        
-    }
-
-
-};
-}
-
-#endif
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/mpiCommunication.hpp b/src/phasicFlow/MPIParallelization/mpiCommunication.hpp
deleted file mode 100644
index 05a41fd5..00000000
--- a/src/phasicFlow/MPIParallelization/mpiCommunication.hpp
+++ /dev/null
@@ -1,377 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-
-#ifndef __mpiCommunication_H__
-#define __mpiCommunication_H__
-
-
-#include "mpiTypes.hpp"
-#include "types.hpp"
-#include "span.hpp"
-
-
-namespace pFlow::MPI
-{
-
-extern DataType realx3Type__;
-
-extern DataType realx4Type__;
-
-extern DataType int32x3Type__;
-
-template<typename T> 
-auto constexpr Type()
-{
-	return MPI_BYTE;
-}
-
-template<typename T>
-auto constexpr sFactor()
-{
-	return sizeof(T);
-}
-
-template<char> 
-auto constexpr Type()
-{
-	return MPI_CHAR;
-}
-template<char>
-auto constexpr sFactor()
-{
-	return 1;
-}
-
-template<short>
-auto constexpr Type()
-{
-	return MPI_SHORT;
-}
-template<short>
-auto constexpr sFactor()
-{
-	return 1;
-}
-
-template<unsigned short>
-auto constexpr Type()
-{
-	return MPI_UNSIGNED_SHORT;
-}
-template<unsigned short>
-auto constexpr sFactor()
-{
-	return 1;
-}
-
-template<int>
-auto constexpr Type()
-{
-	return MPI_INT;
-}
-template<int>
-auto constexpr sFactor()
-{
-	return 1;
-}
-
-template<>
-auto constexpr Type<unsigned int>()
-{
-	return MPI_UNSIGNED;
-}
-template<>
-auto constexpr sFactor<unsigned int>()
-{
-	return 1;
-}
-
-template<>
-auto constexpr Type<long>()
-{
-	return MPI_LONG;
-}
-template<>
-auto constexpr sFactor<long>()
-{
-	return 1;
-}
-
-template<>
-auto constexpr Type<unsigned long>()
-{
-	return MPI_UNSIGNED_LONG;
-}
-template<>
-auto constexpr sFactor<unsigned long>()
-{
-	return 1;
-}
-
-
-template<>
-auto constexpr Type<float>()
-{
-	return MPI_FLOAT;
-}
-template<>
-auto constexpr sFactor<float>()
-{
-	return 1;
-}
-
-template<>
-auto constexpr Type<double>()
-{
-	return MPI_DOUBLE;
-}
-template<>
-auto constexpr sFactor<double>()
-{
-	return 1;
-}
-
-template<>
-inline
-auto Type<realx3>()
-{
-	return realx3Type__;
-}
-
-template<>
-auto constexpr sFactor<realx3>()
-{
-	return 1;
-}
-
-template<>
-inline
-auto Type<realx4>()
-{
-	return realx4Type__;
-}
-
-template<>
-auto constexpr sFactor<realx4>()
-{
-	return 1;
-}
-
-
-template<>
-inline
-auto Type<int32x3>()
-{
-	return int32x3Type__;
-}
-
-
-template<>
-auto constexpr sFactor<int32x3>()
-{
-	return 1;
-}
-
-/*inline 
-auto createByteSequence(int sizeOfElement)
-{
-    DataType newType;
-    MPI_Type_contiguous(sizeOfElement, MPI_CHAR, &newType);
-	MPI_Type_commit(&newType);
-    return newType;
-}*/
-
-inline 
-auto TypeCommit(DataType* type)
-{
-	return MPI_Type_commit(type);
-}
-
-inline 
-auto TypeFree(DataType* type)
-{
-    return MPI_Type_free(type);
-
-}
-template<typename T>
-inline auto getCount(Status* status, int& count)
-{
-	int lCount;
-	auto res = MPI_Get_count(status, Type<T>(), &lCount);
-	count = lCount/sFactor<T>();
-	return res;
-}
-
-template<typename T>
-inline int convertIndex(const int& ind)
-{
-	return ind*sFactor<T>();
-}
-
-template<typename T> 
-inline auto send(span<T> data, int dest, int tag, Comm comm)
-{
-	return MPI_Send(
-        data.data(), 
-        sFactor<T>()*data().size(), 
-        Type<T>(), 
-        dest, 
-        tag, 
-        comm);
-}
-
-
-
-template<typename T>
-inline auto recv(span<T> data, int source, int tag, Comm comm, Status *status)
-{
-	return MPI_Recv(
-        data.data(), 
-        sFactor<T>()*data.size(), 
-        Type<T>(), 
-        source, 
-        tag, 
-        comm, 
-        status);
-}
-
-
-template<typename T>
-inline auto scan(T sData, T& rData, Comm comm, Operation op = SumOp)
-{
-	return MPI_Scan(&sData, &rData, sFactor<T>()*1, Type<T>(), op , comm );
-}
-
-// gathering one scalar data to root processor 
-template<typename T>
-inline auto gather(T sendData, span<T>& recvData, int root, Comm comm)
-{
-	return MPI_Gather(
-		&sendData, 
-		sFactor<T>()*1, 
-		Type<T>(), 
-		recvData.data(),
-		sFactor<T>()*1,
-		Type<T>(),
-		root,
-		comm);
-}
-
-template<typename T>
-inline auto allGather(T sendData, span<T>& recvData, Comm comm)
-{
-	return MPI_Allgather(
-		&sendData,
-		sFactor<T>()*1,
-		Type<T>(),
-		recvData.data(),
-		sFactor<T>()*1,
-		Type<T>(),
-		comm);
-}
-
-template<typename T>
-inline auto scatter(span<T> sendData, T& recvData, int root, Comm comm)
-{
-	return MPI_Scatter(
-		sendData.data(),
-		sFactor<T>()*1,
-		Type<T>(),
-		&recvData,
-		sFactor<T>()*1,
-		Type<T>(),
-		root,
-		comm);
-}
-
-template<typename T>
-inline auto Bcast(T& sendData, int root, Comm comm)
-{
-	return MPI_Bcast(
-		&sendData, sFactor<T>()*1, Type<T>(), root, comm);
-
-}
-
-template<typename T> 
-bool typeCreateIndexedBlock(
-    span<int32> index, 
-    DataType &newType)
-{
-    auto res =  MPI_Type_create_indexed_block(
-        index.size(), 
-        sFactor<T>(), 
-        index.data(), 
-        Type<T>(), 
-        &newType);
-    
-    if(res == Success)
-    {
-        TypeCommit(&newType);
-    }
-    else
-    {
-        return false;
-    }
-
-    return true;	
-}
-
-
-template<typename T>
-inline auto Gatherv
-(
-    span<T> sendData, 
-    span<T>& recvData, 
-    span<int> recvCounts,
-    span<int> displs,
-    int root, 
-    Comm comm)
-{
-    
-    return MPI_Gatherv(
-        sendData.data(), 
-        sendData.size()*sFactor<T>(),
-        Type<T>(),
-        recvData.data(),
-        recvCounts.data(),
-        displs.data(),
-        Type<T>(),
-        root,
-        comm
-         );
-
-}
-
-inline auto Wait(Request* request, Status* status)
-{
-	return MPI_Wait(request, status);
-}
-
-inline auto typeFree(DataType& type)
-{
-	return MPI_Type_free(&type);
-}
-
-
-}
-
-
-
-#endif  //__mpiCommunication_H__
diff --git a/src/phasicFlow/MPIParallelization/mpiTypes.hpp b/src/phasicFlow/MPIParallelization/mpiTypes.hpp
deleted file mode 100644
index c1721290..00000000
--- a/src/phasicFlow/MPIParallelization/mpiTypes.hpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-
-#ifndef __mpiTypes_H__
-#define __mpiTypes_H__
-
-
-#ifdef pFlow_Build_MPI
-
-#include <mpi.h>
-
-namespace pFlow::MPI
-{
-	// types
-	using Comm 			= MPI_Comm;
-    using Group         = MPI_Group;
-	using Status 		= MPI_Status;
-	using Offset 		= MPI_Offset;
-	using Request 		= MPI_Request;
-	using Operation 	= MPI_Op;
-	using Information	= MPI_Info;
-	using DataType 		= MPI_Datatype;
-	
-	inline Comm CommWorld 		= MPI_COMM_WORLD;
-
-	// all nulls
-
-	inline auto ProcNull 		= MPI_PROC_NULL;
-	inline auto InfoNull  		= MPI_INFO_NULL;
-	inline auto RequestNull		= MPI_REQUEST_NULL;
-	inline auto StatusIgnore 	= MPI_STATUS_IGNORE;
-	inline auto StatusesIgnore 	= MPI_STATUSES_IGNORE;
-	inline auto FileNull 		= MPI_FILE_NULL;
-	inline Comm  CommNull 		= MPI_COMM_NULL;
-    inline auto TypeNull        = MPI_DATATYPE_NULL;
-
-	// errors
-	inline const auto Success 	= MPI_SUCCESS;
-	inline const auto ErrOp 	= MPI_ERR_OP;
-
-	inline const auto SumOp		= MPI_SUM;
-
-	inline const size_t MaxNoProcessors = 2048;
-	
-}
-
-#else 
-
-namespace pFlow::MPI
-{
-	
-}
-
-#endif // pFlow_Build_MPI
-
-
-
-#endif //__mpiTypes_H__
diff --git a/src/phasicFlow/MPIParallelization/partitioning.cpp b/src/phasicFlow/MPIParallelization/partitioning.cpp
deleted file mode 100644
index 0ae5cf82..00000000
--- a/src/phasicFlow/MPIParallelization/partitioning.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-
-
-#include "partitioning.hpp"
-#include "error.hpp"
-#include "streams.hpp"
-
-void pFlow::partitioning::freeZoltan()
-{
-	if(validPointers_)
-	{
-		Zoltan::LB_Free_Part(&importGlobalGids_, &importLocalGids_, 
-                      	 	 &importProcs_, &importToPart_);
-
-		Zoltan::LB_Free_Part(&exportGlobalGids_, &exportLocalGids_, 
-                      		 &exportProcs_, &exportToPart_);
-        validPointers_ = false;
-	}
-	
-	zoltan_.release();
-}
-
-
-pFlow::partitioning::partitioning
-(
-    const dictionary& dict, 
-    const box& globalBox
-)
-:
-	globalBox_(globalBox)
-{
-	if(!zoltanInitialized__)
-	{
-		auto rc = Zoltan_Initialize
-        (
-            processors::argc(), 
-            processors::argv(), 
-            &version_
-        );
-        
-		if (rc != ZOLTAN_OK)
-		{
-			fatalErrorInFunction<<"Cannot initialize zoltan"<<endl;
-			fatalExit;
-		}
-		zoltanInitialized__ = true;
-	}
-
-	// Creates Zoltan object 
-	zoltan_ = std::make_unique<Zoltan>(pFlowProcessors().localCommunicator());
-
-	zoltan_->Set_Param("DEBUG_LEVEL", "0");
-  	zoltan_->Set_Param("LB_METHOD", "RCB");
-  	zoltan_->Set_Param("NUM_GID_ENTRIES", "1"); 
-  	zoltan_->Set_Param("NUM_LID_ENTRIES", "1");
-  	zoltan_->Set_Param("OBJ_WEIGHT_DIM", "0");
-    zoltan_->Set_Param("RETURN_LISTS", "ALL");
-	
-}
-
-bool pFlow::partitioning::partition(span<realx3> points, pFlagTypeHost flags)
-{
-    pointCollection pointCollctn{points, flags};
-
-    return partition(pointCollctn);
-}
-int GetObjectSize
-(
-    void *data,
-    int num_gid_entries, 
-    int num_lid_entries,
-    ZOLTAN_ID_PTR global_id,
-    ZOLTAN_ID_PTR local_id,
-    int *ierr
-)
-{
-    *ierr = ZOLTAN_OK;
-    pFlow::uint32 s = *(static_cast<pFlow::uint32*>(data));
-    return static_cast<int>(s);
-}
-
-void PackObject 
-(
-    void *data,
-    int num_gid_entries,
-    int num_lid_entries,
-    ZOLTAN_ID_PTR global_id,
-    ZOLTAN_ID_PTR local_id,
-    int dest,
-    int size,
-    char *buf,
-    int *ierr
-)
-{
-    
-}
-
-bool pFlow::partitioning::migrateData(span<char> src, span<char> dst, uint32 elementSize)
-{
-    dataCollection data{src, dst, elementSize};
-
-    zoltan_->Set_Obj_Size_Fn(GetObjectSize, &elementSize);
-    return false;
-}
-
-pFlow::partitioning::~partitioning()
-{
-	freeZoltan();
-}
-
-void pFlow::partitioning::printBox()const
-{
-	pOutput<< "localBox:" << localBox_<<endl;
-}
diff --git a/src/phasicFlow/MPIParallelization/partitioning.hpp b/src/phasicFlow/MPIParallelization/partitioning.hpp
deleted file mode 100644
index c9483051..00000000
--- a/src/phasicFlow/MPIParallelization/partitioning.hpp
+++ /dev/null
@@ -1,168 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-
-#ifndef __partitioning_hpp__
-#define __partitioning_hpp__
-
-#include "zoltan_cpp.h"
-
-#include "pFlowProcessors.hpp"
-#include "virtualConstructor.hpp"
-#include "box.hpp"
-#include "span.hpp"
-#include "pointFlag.hpp"
-#include "procVector.hpp"
-
-namespace pFlow
-{
-
-struct pointCollection
-{
-	span<realx3> points_;
-	pFlagTypeHost pFlag_;
-
-	uint32 numActivePoints()const
-	{
-		return pFlag_.numActive();
-	}
-};
-
-struct dataCollection
-{
-    span<char> srcData_;
-    span<char> dstData_;
-    uint32 elementSize_;
-};
-
-class partitioning
-{
-protected:
-
-	float 					version_ 	= 0.0;
-
-	std::unique_ptr<Zoltan> zoltan_ 	= nullptr;
-
-	bool					validPointers_ = false;
-
-	box 					globalBox_;
-
-	box 					localBox_; 	
-
-	int32 	changes_, numImport_, numExport_;
-  	
-  	id_t *importGlobalGids_, *importLocalGids_, *exportGlobalGids_, *exportLocalGids_; 
-  	
-  	int32 *importProcs_, *importToPart_, *exportProcs_, *exportToPart_;
-
-    uint32 numBeforePartition_ = 0 ;
-
-	static inline bool 		zoltanInitialized__ = false;
-
-	void freeZoltan();
-
-	virtual 
-	bool partition(pointCollection& points) = 0;
-
-public:
-
-	partitioning(
-		const dictionary& dict, 
-		const box& globalBox);
-
-	virtual 
-	~partitioning();
-
-    create_vCtor(
-        partitioning,
-        dictionary,
-        (
-            const dictionary& dict, 
-		    const box& globalBox
-        ),
-        (dict, globalBox));
-
-	bool partition(
-		span<realx3> points, 
-		pFlagTypeHost flags);
-
-    
-    bool migrateData(span<char> src, span<char> dst, uint32 elementSize);
-
-	inline
-	auto localBox()const
-	{
-		return localBox_;
-	}
-
-	inline
-	const auto& globalBox()const
-	{
-		return globalBox_;
-	}
-
-	inline 
-	bool partitionsChanged()const
-	{
-		return changes_ == 1;
-	}
-
-    
-    uint32 numberImportThisProc()const
-    {
-        return numImport_;
-    }
-
-    uint32 numberExportThisProc()const
-    {
-        return numExport_;
-    }
-    
-    virtual
-    span<int32> exportList(int procNo)const = 0;
-
-    virtual 
-    pFlow::MPI::procVector<span<int32>> allExportLists()const=0;
-
-	void printBox()const;
-
-	
-};
-
-
-}
-
-
-#endif //__partitioning_hpp__
-
-
-
-/*static 
-	int getNumberOfPoints(void *data, int32 *ierr);
-
-	static 
-	void getPointList(
-		void *data, 
-		int32 sizeGID, 
-		int32 sizeLID,
-        id_t* globalID, 
-        id_t* localID,
-        int32 wgt_dim, 
-        float *obj_wgts, 
-        int32 *ierr);*/
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/procCommunication.cpp b/src/phasicFlow/MPIParallelization/procCommunication.cpp
deleted file mode 100644
index 81869453..00000000
--- a/src/phasicFlow/MPIParallelization/procCommunication.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-
-#include "procCommunication.hpp"
-
-
-pFlow::MPI::procCommunication::procCommunication
-(
-    const localProcessors& proc
-)
-:
-    processors_(proc)
-{}
diff --git a/src/phasicFlow/MPIParallelization/procCommunication.hpp b/src/phasicFlow/MPIParallelization/procCommunication.hpp
deleted file mode 100644
index db600386..00000000
--- a/src/phasicFlow/MPIParallelization/procCommunication.hpp
+++ /dev/null
@@ -1,178 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-#ifndef __procCommunication_hpp__
-#define __procCommunication_hpp__
-
-
-#include "procVector.hpp"
-#include "localProcessors.hpp"
-#include "mpiCommunication.hpp"
-
-namespace pFlow::MPI
-{
-
-
-class procCommunication
-{
-protected:
-
-	const localProcessors& processors_;
-
-public:	
-
-    procCommunication(const localProcessors& proc);
-    	
-	~procCommunication()=default;
-    
-    /// @brief Tell if this processor is master processor in the local
-    /// communicator 
-    /// @return true if this processor is master  
-
-    inline 
-    const auto& processors()const
-    {
-        return processors_;
-    }
-
-    inline
-    bool localMaster()const
-    {
-        return processors_.localMaster();;
-    }
-
-    inline
-    auto localSize()const
-    {
-        return processors_.localSize();
-    }
-
-    inline
-    auto localRank()const
-    {
-        return processors_.localRank();
-    }
-
-    inline
-    auto localCommunicator()const
-    {
-        return processors_.localCommunicator();
-    }
-
-    /// @brief return the master number in the local communicator  
-    auto localMasterNo()const
-    {
-        return processors_.localMasterNo();
-    }
-
-	/// Send a single val to all processors including itself (local communicator)
-	template<typename T>
-	std::pair<T,bool> distributeMasterToAll(const T& val)
-	{
-		
-		T retVal = val;
-		auto res = CheckMPI(
-            Bcast(retVal, localMasterNo(),localCommunicator() ),
-            false);
-		
-		return {retVal, res};
-	}
-
-	/// @brief  Send a single value to all processor including master (in local communicator)
-	/// @param val value to be sent
-	/// @param recvVal recieved value 
-	/// @return true if successful and false if fail
-	template<typename T>
-	bool distributeMasterToAll(const T& val, T& recvVal)
-	{
-		recvVal = val;
-		return CheckMPI(
-            Bcast(recvVal, localMasterNo(), localCommunicator()),
-            false);
-	}
-
-	/// @brief  values in the vector (size is equal to number of 
-    // processors in local communicator) to each processor 
-	template<typename T>
-	std::pair<T,bool> distributeMasterToAll(const procVector<T>& vals)
-	{
-		T val;	
-		auto vec = vals.getSpan();
-		auto res = CheckMPI(
-            scatter(vec, val, localMasterNo(), localCommunicator()),
-            false);
-		
-		return {val, res};
-	}
-
-    /// @brief Each processor in the local communicator calls this funtion with a value 
-    /// and the values are distributed among all processors 
-	template<typename T>
-	std::pair<procVector<T>, bool> collectAllToAll(const T& val)
-	{
-		procVector<T> allVec;
-		auto vec = allVec.getSpan();
-		auto res = CheckMPI(
-            allGather(val, vec, localCommunicator()), 
-            false);
-		return {allVec, res};
-	}
-
-    /// @brief Each processor in the local communicator calls this funtion with a value 
-    /// and the values are distributed among all processors 
-	template<typename T>
-	bool collectAllToAll(const T& val, procVector<T>& allVec)
-	{
-		auto vec = allVec.getSpan();
-		return CheckMPI(
-            allGather(val, vec, localCommunicator()), 
-            false);
-	}
-
-    /// @brief Each processor in the local communicator calls this function with a value
-    /// and all values are collected in the master processor 
-	template<typename T>
-	std::pair<procVector<T>,bool> collectAllToMaster(const T& val)
-	{
-		// only on master processor
-		procVector<T> masterVec(processors_, true);
-		
-		auto masterSpan = masterVec.getSpan();
-		auto res = CheckMPI( 
-			gather(val,masterSpan, localMasterNo(), localCommunicator()), 
-            false);
-
-		return {masterVec, res};
-
-	}
-
-    template<typename T>
-	bool collectAllToMaster(const T& val, procVector<T>& masterVec)
-	{
-		// only on master processor
-		auto [vec, res] = collectAllToMaster(val);
-        masterVec = vec;
-		return res;
-	}
-
-}; //procCommunication
-
-} // pFlow::MPI
-
-#endif //__procCommunication_hpp__
diff --git a/src/phasicFlow/MPIParallelization/procVector.hpp b/src/phasicFlow/MPIParallelization/procVector.hpp
deleted file mode 100644
index f9a80037..00000000
--- a/src/phasicFlow/MPIParallelization/procVector.hpp
+++ /dev/null
@@ -1,199 +0,0 @@
-#ifndef __procVector_hpp__ 
-#define __procVector_hpp__
-
-// from PhasicFlow
-
-#include "localProcessors.hpp"
-#include "span.hpp"
-#include "streams.hpp"
-#include "IOPattern.hpp"
-
-#include "mpiTypes.hpp"
-
-namespace pFlow::MPI
-{
-
-template<typename T>
-class procVector
-:
-	public std::vector<T>
-{
-public:
-
-	using ProcVectorType = procVector<T>;
-
-	using VectorType = std::vector<T>;
-
-protected:
-
-	int 	rank_ 	= 0;
-
-    bool 	isMaster_ = false;
-
-	using VectorType::reserve;
-
-	using VectorType::resize;
-
-	using VectorType::assign;
-
-	using VectorType::clear;
-
-	using VectorType::erase;
-
-public:
-
-	procVector(
-		const localProcessors& procs,
-		bool onlyMaster = false)
-    :
-        rank_(procs.localRank()),
-        isMaster_(procs.localMaster())
-	{
-       
-		if( onlyMaster && !isMaster_ ) return;
-		this->reserve(procs.localSize());
-		this->resize(procs.localSize());
-	}
-
-	procVector(
-		const T& val,
-		const localProcessors& procs,
-		bool onlyMaster = false)
-	:
-		procVector(procs, onlyMaster)
-	{
-		std::fill(this->begin(), this->end(), val);
-	}
-
-    procVector(const T& val, const procVector& src)
-    {
-        this->reserve(src.size());
-        this->resize(src.size());
-        std::fill(this->begin(), this->end(), val);
-    }
-
-    procVector(const localProcessors& procs, const VectorType& src)
-	:
-		procVector(procs)
-	{
-		if(src.size()!= this->size())
-		{
-			fatalErrorInFunction<<
-			"Size of std::vector and procVector does not match in construction"<<endl;
-			fatalExit;
-		}
-
-        this->assign(src.begin(), src.end());
-	} 
-        
-	procVector(const procVector&) = default;
-	
-	procVector(procVector&&) = default;
-
-	procVector& operator=(const procVector&) = default;
-	
-	procVector& operator=(procVector&&) = default;
-
-	procVector& operator=(const VectorType& src)
-	{
-		if(src.size() != this->size())
-		{
-			fatalErrorInFunction<<
-			"Size of std::vector and procVector does not match in copy assignment"<<endl;
-			fatalExit;
-		}
-
-		static_cast<VectorType&>(*this).operator=(src);
-		return *this;
-	}
-
-	procVector& operator=(VectorType&& src)
-	{
-		if(src.size() != this->size())
-		{
-			fatalErrorInFunction<<
-			"Size of std::vector and procVector does not match in move assignment"
-			<<endl;
-			fatalExit;
-		}
-
-		static_cast<VectorType&>(*this).operator=(std::move(src));
-		return *this;
-	}
-
-	procVector(const localProcessors& procs, VectorType&& src)
-	:
-		VectorType(std::move(src))
-	{
-		if(this->size()!= static_cast<size_t>(procs.localSize()))
-		{
-			fatalErrorInFunction<<
-			"Size of std::vector and procVector does not match in move"<<endl;
-            fatalExit;
-		}
-        isMaster_ = procs.localMaster();
-        rank_ = procs.localRank();
-	}
-
-	~procVector()=default;
-
-    inline
-	auto& thisValue()
-	{
-		return VectorType::operator[](rank_);
-	}
-
-    inline
-	const auto& thisValue()const
-	{
-		return VectorType::operator[](rank_);
-	}
-
-    inline
-	auto size()const
-	{
-		return VectorType::size();
-	}
-
-    inline
-	auto rank()const
-	{
-		return rank_;
-	}
-
-	inline
-	auto getSpan()
-	{
-		return span<T>(this->data(), this->size());
-	}
-	
-	inline 
-	auto getSpan()const
-	{
-		return span<T>(const_cast<T*>(this->data()), this->size());
-	}
-
-    bool write(
-        iOstream& os,
-        const IOPattern& iop ) const
-    {
-        return writeStdVector(os, *this, iop);	
-    }
-
-};
-
-template<typename T> 
-inline iOstream& operator << (iOstream& os, const procVector<T>& ovec )
-{	
-	if( !ovec.write(os, IOPattern::AllProcessorsDifferent) )
-	{
-		ioErrorInFile(os.name(), os.lineNumber());
-		fatalExit;
-	}
-	return os; 
-}
-
-}
-
-
-#endif
diff --git a/src/phasicFlow/MPIParallelization/processorBoundaryField.cpp b/src/phasicFlow/MPIParallelization/processorBoundaryField.cpp
deleted file mode 100644
index 5e94d0aa..00000000
--- a/src/phasicFlow/MPIParallelization/processorBoundaryField.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-
-template<class T, class MemorySpace>
-    pFlow::processorBoundaryField<T, MemorySpace>::processorBoundaryField
-(
-	const boundaryBase& boundary, 
-	InternalFieldType& internal
-)
-:
-    BoundaryFieldType(boundary, internal)
-{}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/processorBoundaryField.hpp b/src/phasicFlow/MPIParallelization/processorBoundaryField.hpp
deleted file mode 100644
index b3e83a22..00000000
--- a/src/phasicFlow/MPIParallelization/processorBoundaryField.hpp
+++ /dev/null
@@ -1,80 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-#ifndef __processorBoundaryField_hpp__
-#define __processorBoundaryField_hpp__
-
-#include "boundaryField.hpp"
-
-namespace pFlow
-{
-
-template< class T, class MemorySpace = void>
-class processorBoundaryField
-:
-    public boundaryField<T, MemorySpace> 
-{
-public:
-	
-    using processorBoundaryFieldType = processorBoundaryField<T, MemorySpace>;
-
-	using BoundaryFieldType = boundaryField<T, MemorySpace>;
-
-	using InternalFieldType = typename BoundaryFieldType::InternalFieldType;
-
-	using memory_space 		= typename BoundaryFieldType::memory_space;
-
-	using execution_space 	= typename BoundaryFieldType::execution_space;
-
-   
-
-public:
-
-	TypeInfo("boundaryField<processor>");
-
-	processorBoundaryField(
-		const boundaryBase& boundary, 
-		InternalFieldType& internal);
-		
-
-	add_vCtor
-	(
-		BoundaryFieldType,
-		processorBoundaryFieldType,
-		boundaryBase
-	);
-
-
-	bool hearChanges
-	(
-		const message& msg, 
-    	const anyList& varList
-	) override
-    {
-		notImplementedFunction;
-		return false;
-	}
-
-};
-
-}
-
-#include "processorBoundaryField.cpp"
-
-#endif //__processorBoundaryField_hpp__
diff --git a/src/phasicFlow/MPIParallelization/processorBoundaryFields.cpp b/src/phasicFlow/MPIParallelization/processorBoundaryFields.cpp
deleted file mode 100644
index a81b5249..00000000
--- a/src/phasicFlow/MPIParallelization/processorBoundaryFields.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-
-//#include "Field.hpp"
-#include "createBoundaryFields.hpp"
-#include "processorBoundaryField.hpp"
-
-createBoundary(pFlow::int8, pFlow::HostSpace, processor);
-
-createBoundary(pFlow::real, pFlow::HostSpace, processor);
-
-
diff --git a/src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp b/src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp
deleted file mode 100644
index c2345ab6..00000000
--- a/src/phasicFlow/MPIParallelization/rcb1DPartitioning.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-
-#include "zoltan_cpp.h"
-
-
-#include "error.hpp"
-#include "processors.hpp"
-#include "rcb1DPartitioning.hpp"
-
-bool pFlow::rcb1DPartitioning::partition(pointCollection &points)
-{
-    
-    zoltan_->Set_Param("RCB_OUTPUT_LEVEL", "0");
-  	zoltan_->Set_Param("RCB_RECTILINEAR_BLOCKS", "1");
-  	zoltan_->Set_Param("KEEP_CUTS", "1"); 
-  	zoltan_->Set_Param("REDUCE_DIMENSIONS", "1");
-  	zoltan_->Set_Param("RCB_RECOMPUTE_BOX", "1");
-  	zoltan_->Set_Param("AVERAGE_CUTS", "0");
-    zoltan_->Set_Param("MIGRATE_ONLY_PROC_CHANGES", "0");
-
-  	zoltan_->Set_Num_Obj_Fn(rcb1DPartitioning::getNumberOfPoints, &points);
-  	zoltan_->Set_Obj_List_Fn(rcb1DPartitioning::getPointList, &points);
-  	zoltan_->Set_Num_Geom_Fn(rcb1DPartitioning::getNumGeometry, &points);
-  	switch (direction_)
-    {
-    case Direction::X:
-        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_x, &points);
-        break;
-    case Direction::Y:
-        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_y, &points);
-        break;
-    case Direction::Z:
-        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_z, &points);
-        break;
-    }
-    
-	int numGidEntries_, numLidEntries_;
-	int rc = zoltan_->LB_Partition(changes_, numGidEntries_, numLidEntries_,
-    	numImport_, importGlobalGids_, importLocalGids_, importProcs_, importToPart_,
-    	numExport_, exportGlobalGids_, exportLocalGids_, exportProcs_, exportToPart_);
-
-  	if (rc != ZOLTAN_OK)
-  	{	
-        fatalErrorInFunction<< "Zoltan faild to perform partitioning."<<endl;
-    	return false;
-  	}
-    
-    for(auto& ids:exportIds_)
-    {
-        ids.clear();
-    }
-    
-    std::vector<int32> thisProc(points.numActivePoints(),0);
-    
-        
-    for(auto i =0; i<numExport_; i++)
-    {
-        exportIds_[exportProcs_[i]].push_back(exportGlobalGids_[i]);
-        thisProc[exportGlobalGids_[i]] = exportGlobalGids_[i];
-    }
-
-    for(int i=0; i<thisProc.size(); i++)
-    {
-        if(thisProc[i]==0)
-            exportIds_[0].push_back(i);
-    }
-
-  	validPointers_ = true;
-
-  	int nDim;
-  	double x0, y0, z0, x1, y1,z1;
-  	zoltan_->RCB_Box
-    (
-        processors::globalRank(), 
-        nDim,
-   		x0, y0, z0,
-   		x1, y1, z1
-    );
-
-  	localBox_ = globalBox_;
-
-    if(equal(x0, x1))
-    {
-        x0 = x0 - 0.00001;
-        x1 = x1 + 0.00001;
-    }
-
-    switch (direction_)
-    {
-    case Direction::X :
-        localBox_.minPoint().x_ = x0;
-  	    localBox_.maxPoint().x_ = x1;
-        break;
-
-    case Direction::Y :
-        localBox_.minPoint().y_ = x0;
-  	    localBox_.maxPoint().y_ = x1;
-        break;
-    
-    case Direction::Z :
-        localBox_.minPoint().z_ = x0;
-  	    localBox_.maxPoint().z_ = x1;
-        break;
-    }
-  	
-
-   	localBox_.minPoint() = max(localBox_.minPoint(), globalBox_.minPoint());
-   	localBox_.maxPoint() = min(localBox_.maxPoint(), globalBox_.maxPoint());
-
-
-  	return true;
-}
-
-pFlow::rcb1DPartitioning::rcb1DPartitioning
-(
-    const dictionary &dict,
-    const box &globalBox
-)
-: 
-    partitioning(dict, globalBox),
-    exportIds_(pFlowProcessors())
-{
-
-    word directionName = dict.getVal<word>("direction");
-
-	if(toUpper(directionName)== "X")
-    {
-        direction_ = Direction::X;
-        dirVector_ ={1.0, 0.0, 0.0};
-    }
-    else if( toUpper(directionName) == "Y")
-    {
-        direction_ = Direction::Y;
-        dirVector_ ={0.0, 1.0, 0.0};
-    }
-    else if( toUpper(directionName) == "Z")
-    {
-        direction_ = Direction::Z;
-        dirVector_ ={0.0, 0.0, 1.0};
-    }
-    else
-    {
-        fatalErrorInFunction<< "wrong direction  in dictionary "<<
-        dict.globalName()<<". Directions should be one of x, y, or z."<<endl;
-        fatalError;
-    }       
-			
-}
-
-int pFlow::rcb1DPartitioning::getNumGeometry(void *data, int *ierr)
-{
-  *ierr = ZOLTAN_OK;
-  return 1;
-}
-
-int pFlow::rcb1DPartitioning::getNumberOfPoints(void *data, int *ierr)
-{
-    auto *obj = static_cast<pointCollection *>(data);
-
-    *ierr = ZOLTAN_OK;
-
-    return obj->numActivePoints();
-}
-
-void pFlow::rcb1DPartitioning::getPointList
-(
-    void *data, 
-    int sizeGID, 
-    int sizeLID, 
-    ZOLTAN_ID_PTR globalID, 
-    ZOLTAN_ID_PTR localID, 
-    int wgt_dim, 
-    float *obj_wgts, 
-    int *ierr
-)
-{			
-    auto* obj = static_cast<pointCollection *>(data);
-    *ierr = ZOLTAN_OK;
-    
-    auto activeRange = obj->pFlag_.activeRange();
-    uint32 n = 0;
-    for (auto i=activeRange.start(); i<activeRange.end(); i++)
-    {
-        if( obj->pFlag_.isActive(i) )
-        {
-            globalID[n] = i;
-            localID[n] = n;
-            n++;
-        }
-    }
-
-}
-
-void pFlow::rcb1DPartitioning::getGeometryList_x
-(
-	void *data, 
-	int sizeGID, 
-	int sizeLID,
-	int num_obj,
-    ZOLTAN_ID_PTR globalID, 
-    ZOLTAN_ID_PTR localID,
-    int num_dim, 
-    double *geom_vec, 
-    int *ierr
-)
-{
-
-	auto* obj = static_cast<pointCollection *>(data);
-
-  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
-  	{
-    	*ierr = ZOLTAN_FATAL;
-    	return;
-  	}
-
-    auto activeRange = obj->pFlag_.activeRange();
-    uint32 n = 0;
-    for (auto i=activeRange.start(); i<activeRange.end(); i++)
-    {
-        if( obj->pFlag_.isActive(i) )
-        {
-            geom_vec[n] 	  = obj->points_[i].x_;
-            n++;
-        }
-    }
-
-  	*ierr = ZOLTAN_OK;
-
-	return;
-}
-
-void pFlow::rcb1DPartitioning::getGeometryList_y
-(
-	void *data, 
-	int sizeGID, 
-	int sizeLID,
-	int num_obj,
-    ZOLTAN_ID_PTR globalID, 
-    ZOLTAN_ID_PTR localID,
-    int num_dim, 
-    double *geom_vec, 
-    int *ierr
-)
-{
-
-	auto* obj = static_cast<pointCollection *>(data);
-
-  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
-  	{
-    	*ierr = ZOLTAN_FATAL;
-    	return;
-  	}
-
-    auto activeRange = obj->pFlag_.activeRange();
-    uint32 n = 0;
-    for (auto i=activeRange.start(); i<activeRange.end(); i++)
-    {
-        if( obj->pFlag_.isActive(i) )
-        {
-            geom_vec[n] 	  = obj->points_[i].y_;
-            n++;
-        }
-    }
-
-  	*ierr = ZOLTAN_OK;
-
-	return;
-}
-
-void pFlow::rcb1DPartitioning::getGeometryList_z
-(
-	void *data, 
-	int sizeGID, 
-	int sizeLID,
-	int num_obj,
-    ZOLTAN_ID_PTR globalID, 
-    ZOLTAN_ID_PTR localID,
-    int num_dim, 
-    double *geom_vec, 
-    int *ierr
-)
-{
-
-	auto* obj = static_cast<pointCollection *>(data);
-
-  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
-  	{
-    	*ierr = ZOLTAN_FATAL;
-    	return;
-  	}
-
-    auto activeRange = obj->pFlag_.activeRange();
-    uint32 n = 0;
-    for (auto i=activeRange.start(); i<activeRange.end(); i++)
-    {
-        if( obj->pFlag_.isActive(i) )
-        {
-            geom_vec[n] 	  = obj->points_[i].z_;
-            n++;
-        }
-    }
-
-  	*ierr = ZOLTAN_OK;
-
-	return;
-}
-
diff --git a/src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp b/src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp
deleted file mode 100644
index b58532e3..00000000
--- a/src/phasicFlow/MPIParallelization/rcb1DPartitioning.hpp
+++ /dev/null
@@ -1,240 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-#ifndef __rcb1DPartitioning_hpp__
-#define __rcb1DPartitioning_hpp__
-
-#include "partitioning.hpp"
-#include "procVector.hpp"
-
-namespace pFlow
-{
-
-
-class rcb1DPartitioning
-:
-public partitioning
-{
-public:
-
-	enum Direction  
-	{
-		X = 0,
-		Y = 1,
-		Z = 2
-	};
-
-protected:
-
-	/// Direction of partitioning
-	Direction   direction_ = Direction::X;
-
-	realx3 		dirVector_ = {1.0, 0.0, 0.0};
-
-    word        directionName_ = "x";
-
-    MPI::procVector<std::vector<int>> exportIds_;
-	
-	bool partition(pointCollection& points) override;
-
-public:
-
-	
-	rcb1DPartitioning(
-		const dictionary& dict, 
-		const box& globalBox);
-	
-
-	~rcb1DPartitioning() override=default;
-
-    span<int32> exportList(int procNo)const override
-    {
-        return span<int32>(
-            const_cast<int32*>(exportIds_[procNo].data()), 
-            exportIds_[procNo].size());
-    }
-
-    
-    pFlow::MPI::procVector<span<int32>> allExportLists()const override
-    {
-        pFlow::MPI::procVector<span<int32>> allList(pFlowProcessors());
-
-        for(int i=0; i<allList.size(); i++)
-            allList[i]= exportList(i);
-
-        return allList;
-    }
-
-	static 
-	int getNumGeometry(void *data, int *ierr);
-
-	static
-	int getNumberOfPoints(void *data, int *ierr);
-	
-
-	static
-	void getPointList
-	(
-		void *data, 
-		int sizeGID, 
-		int sizeLID,
-		ZOLTAN_ID_PTR globalID, 
-		ZOLTAN_ID_PTR localID,
-		int wgt_dim, 
-		float *obj_wgts, 
-		int *ierr
-	);
-
-    static
-    void getGeometryList_x(
-        void *data, 
-        int sizeGID, 
-        int sizeLID, 
-        int num_obj, 
-        ZOLTAN_ID_PTR globalID, 
-        ZOLTAN_ID_PTR localID, 
-        int num_dim, 
-        double *geom_vec, 
-        int *ierr);
-
-    static
-    void getGeometryList_y(
-        void *data, 
-        int sizeGID, 
-        int sizeLID, 
-        int num_obj, 
-        ZOLTAN_ID_PTR globalID, 
-        ZOLTAN_ID_PTR localID, 
-        int num_dim, 
-        double *geom_vec, 
-        int *ierr);
-
-    static
-    void getGeometryList_z(
-        void *data, 
-        int sizeGID, 
-        int sizeLID, 
-        int num_obj, 
-        ZOLTAN_ID_PTR globalID, 
-        ZOLTAN_ID_PTR localID, 
-        int num_dim, 
-        double *geom_vec, 
-        int *ierr);
-};
-
-/*class RCB_y_partitioning
-:
-public partitioning
-{
-public:
-
-	
-	RCB_y_partitioning(int argc, char *argv[], pointCollection& collection, const box& gBox)
-	:
-	partitioning(argc, argv, collection, gBox)
-	{}
-
-	virtual 
-	~RCB_y_partitioning()=default;
-
-	
-	bool partition() override;
-
-
-	static 
-	void getGeometryList(
-		void *data, 
-		int sizeGID, 
-		int sizeLID,
-		int num_obj,
-        ZOLTAN_ID_PTR globalID, 
-        ZOLTAN_ID_PTR localID,
-        int num_dim, 
-        double *geom_vec, 
-        int *ierr)
-	{
-
-		auto* obj = static_cast<pointCollection *>(data);
-
-	  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
-	  	{
-	    	*ierr = ZOLTAN_FATAL;
-	    	return;
-	  	}
-
-	  	*ierr = ZOLTAN_OK;
-
-	  	for (int i=0;  i < num_obj ; i++)
-	  	{
-	    	geom_vec[i] 	  = obj->pointList()[i].y_;
-		}
-
-	  return;
-	}
-
-	
-	static 
-	int getNumGeometry(void *data, int *ierr)
-	{
-	  *ierr = ZOLTAN_OK;
-	  return 1;
-	}
-
-};
-
-
-class RCB_x_partitioning
-:
-public partitioning
-{
-public:
-
-	
-	RCB_x_partitioning(int argc, char *argv[], pointCollection& collection, const box& gBox)
-	:
-	partitioning(argc, argv, collection, gBox)
-	{}
-
-	virtual 
-	~RCB_x_partitioning()=default;
-
-	
-	bool partition() override;
-
-
-	static 
-	void getGeometryList(
-		void *data, 
-		int sizeGID, 
-		int sizeLID,
-		int num_obj,
-        ZOLTAN_ID_PTR globalID, 
-        ZOLTAN_ID_PTR localID,
-        int num_dim, 
-        double *geom_vec, 
-        int *ierr);
-	
-	static 
-	int getNumGeometry(void *data, int *ierr);
-	
-
-};*/
-
-} // pFlow
-#endif //__rcb1DPartitioning_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.cpp b/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.cpp
deleted file mode 100644
index a771dc54..00000000
--- a/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-
-
-template<typename T>
-pFlow::MPI::scatteredMasterDistribute<T>::scatteredMasterDistribute
-(
-    const localProcessors& procs
-)
-:
-    procCommunication(procs),
-    indexedMap_(TypeNull, procs, true)
-{
-    
-}
-
-template<typename T>
-bool pFlow::MPI::scatteredMasterDistribute<T>::setDataMaps
-(
-    procVector<span<uint32>>& maps
-)
-{
-    if(this->localMaster())
-    {
-        if(maps.size() != this->localSize() )
-        {
-            fatalErrorInFunction<<"size mismatch";
-            return false;
-        }
-
-        std::vector<int32> index;
-
-        freeIndexedMap();
-
-        for(auto proc = 0; proc< maps.size(); proc++)
-        {
-            auto m = maps[proc];
-            index.resize(m.size());
-            for(auto i=0; i<index.size(); i++ )
-            {
-                index[i] = m[i];
-            }
-
-            DataType dt;
-            
-            if(! typeCreateIndexedBlock<T>( makeSpan(index), dt)) 
-            {
-                fatalErrorInFunction;
-                return false;
-            }
-            else
-            {
-                indexedMap_[proc] = dt;
-            }
-        }
-    }
-    return true;
-}
-
-
-template<typename T>
-bool pFlow::MPI::scatteredMasterDistribute<T>::setDataMaps
-(
-    procVector<span<int32>>& maps
-)
-{
-    if(this->localMaster())
-    {
-        if(maps.size() != this->localSize() )
-        {
-            fatalErrorInFunction<<"size mismatch";
-            return false;
-        }
-        
-        freeIndexedMap();
-        
-
-        for(auto proc = 0; proc< maps.size(); proc++)
-        {
-            DataType dt;
-            if( !typeCreateIndexedBlock<T>(maps[proc], dt) )
-            {
-                fatalErrorInFunction;
-                return false;
-            }
-            else
-            {
-                indexedMap_[proc] = dt;
-            }
-        }
-    }
-    return true;
-}
-
-template<typename T>
-void pFlow::MPI::scatteredMasterDistribute<T>::freeIndexedMap()
-{
-    for(auto i=0; i<indexedMap_.size(); i++)
-    {
-        if(indexedMap_[i]!= TypeNull)
-        {
-            TypeFree(&indexedMap_[i]);
-            indexedMap_[i] = TypeNull;
-        }
-    }
-}
-
-
-template<typename T>
-bool pFlow::MPI::scatteredMasterDistribute<T>::distribute
-(
-    span<T>& sendBuff, 
-    span<T>& recvb
-)
-{
-    procVector<Request> requests(processors(), true);
-    procVector<Status> statuses(processors(), true);
-
-    if(this->localMaster())
-    {
-        bool res = true;
-        for(int32 i = indexedMap_.size()-1; i>=0; i--)
-        {
-            res = res&&CheckMPI(
-                MPI_Issend( 
-                    sendBuff.data(), 
-                    1, 
-                    indexedMap_[i], 
-                    i, 
-                    0, 
-                    localCommunicator(),
-                    &requests[i]), 
-                false);
-        }
-
-        if(!res)return false;		
-    }
-
-    Status stat;
-    bool sucss = CheckMPI( 
-        MPI_Recv(
-            recvb.data(), 
-            recvb.size()*sFactor<T>(), 
-            Type<T>(), 
-            0, 
-            0, 
-            localCommunicator(),
-            &stat),
-        false);
-            
-    if(this->localMaster())
-    {
-        CheckMPI(
-            MPI_Waitall(requests.size(), requests.data(), statuses.data()),
-            false
-            );
-    }
-
-    return sucss;
-}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.hpp b/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.hpp
deleted file mode 100644
index dfffb384..00000000
--- a/src/phasicFlow/MPIParallelization/scatteredMasterDistribute.hpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-
-#ifndef __scatteredMasterDistribute_hpp__ 
-#define __scatteredMasterDistribute_hpp__
-
-#include "procCommunication.hpp"
-#include "mpiCommunication.hpp"
-#include "procVector.hpp"
-#include "streams.hpp"
-
-
-namespace pFlow::MPI
-{
-
-template<typename T>
-class scatteredMasterDistribute
-:
-	public procCommunication
-{
-protected:
-
-	procVector<DataType>			indexedMap_;
-
-    void freeIndexedMap();
-
-public:
-
-	scatteredMasterDistribute(const localProcessors& procs);
-
-	~scatteredMasterDistribute()
-    {
-        freeIndexedMap();
-    }
-
-	scatteredMasterDistribute(const scatteredMasterDistribute&)=delete;
-
-	scatteredMasterDistribute& operator=(const scatteredMasterDistribute&) = delete;
-
-	bool setDataMaps(procVector<span<uint32>>& maps);
-
-    bool setDataMaps(procVector<span<int32>>& maps);
-	
-	bool distribute(span<T>& sendBuff, span<T>& recvb);
-	
-};
-
-} //pFlow::MPI
-
-#include "scatteredMasterDistribute.cpp"
-
-#endif //__scatteredMasterDistribute_hpp__
diff --git a/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.cpp b/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.cpp
deleted file mode 100644
index 7579e8d5..00000000
--- a/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-
-#include "scatteredMasterDistributeChar.hpp"
-
-pFlow::MPI::scatteredMasterDistribute<char>::scatteredMasterDistribute
-(
-    size_t  sizeOfElement,
-    const localProcessors& procs
-)
-:
-    procCommunication(procs),
-    indexedMap_(TypeNull, procs, true),
-    sizeOfElement_(sizeOfElement)
-{}
-
-
-bool pFlow::MPI::scatteredMasterDistribute<char>::setDataMaps
-(
-    procVector<span<uint32>>& maps
-)
-{
-    if(this->localMaster())
-    {
-        if(maps.size() != this->localSize() )
-        {
-            fatalErrorInFunction<<"size mismatch";
-            return false;
-        }
-
-        freeIndexedMap();
-
-        std::vector<MPI_Aint> index;
-
-        for(auto proc = 0; proc< maps.size(); proc++)
-        {
-            auto m = maps[proc];
-            index.resize(m.size());
-            for(auto i=0; i<index.size(); i++ )
-            {
-                index[i] = m[i]*sizeOfElement_;
-            }
-            
-            DataType dt;
-            MPI_Type_create_hindexed_block(
-                m.size(), 
-                sizeOfElement_, 
-                index.data(), 
-                MPI_BYTE, 
-                &dt);
-            MPI_Type_commit(&dt);
-                
-            indexedMap_[proc] = dt;
-            
-        }
-    }
-    
-    return true;
-}
-
-bool pFlow::MPI::scatteredMasterDistribute<char>::setDataMaps
-(
-    procVector<span<int32>>& maps
-)
-{
-    if(this->localMaster())
-    {
-        if(maps.size() != this->localSize() )
-        {
-            fatalErrorInFunction<<"size mismatch";
-            return false;
-        }
-
-        std::vector<MPI_Aint> index;
-        freeIndexedMap();
-        
-        for(auto proc = 0; proc< maps.size(); proc++)
-        {
-            
-            auto m = maps[proc];
-            index.resize(m.size());
-            for(auto i=0; i<index.size(); i++ )
-            {
-                index[i] = m[i]*sizeOfElement_;
-            }
-            
-            DataType dt;
-            MPI_Type_create_hindexed_block(
-                index.size(), 
-                sizeOfElement_, 
-                index.data(), 
-                MPI_CHAR, 
-                &dt);
-            MPI_Type_commit(&dt);
-                
-            indexedMap_[proc] = dt;
-            
-        }
-    }
-
-    return true;
-}
-
-
-void pFlow::MPI::scatteredMasterDistribute<char>::freeIndexedMap()
-{
-    for(auto i=0; i<indexedMap_.size(); i++)
-    {
-        if(indexedMap_[i]!= TypeNull)
-        {
-            TypeFree(&indexedMap_[i]);
-            indexedMap_[i] = TypeNull;
-        }
-    }
-}
-
-bool pFlow::MPI::scatteredMasterDistribute<char>::distribute
-(
-    span<char>& sendBuff, 
-    span<char>& recvb
-)
-{
-    procVector<Request> requests(processors(), true);
-    procVector<Status> statuses(processors(), true);
-
-
-    if(this->localMaster())
-    {
-        bool res = true;
-        for(int32 i = indexedMap_.size()-1; i>=0; i--)
-        {
-            res = res&&CheckMPI(
-                MPI_Issend( 
-                    sendBuff.data(), 
-                    1, 
-                    indexedMap_[i], 
-                    i, 
-                    0, 
-                    localCommunicator(),
-                    &requests[i]), 
-                false);
-        }
-
-        if(!res)return false;		
-    }
-
-    Status stat;
-    bool sucss = CheckMPI( 
-        MPI_Recv(
-            recvb.data(), 
-            recvb.size(), 
-            MPI_CHAR, 
-            0, 
-            0, 
-            localCommunicator(),
-            &stat),
-        true); 
-    
-    if(this->localMaster())
-    {
-        CheckMPI(
-            MPI_Waitall(requests.size(), requests.data(), statuses.data()),
-            false
-            );
-    }
-    
-    return sucss;
-}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.hpp b/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.hpp
deleted file mode 100644
index e0cee3b4..00000000
--- a/src/phasicFlow/MPIParallelization/scatteredMasterDistributeChar.hpp
+++ /dev/null
@@ -1,67 +0,0 @@
-/*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
-------------------------------------------------------------------------------
-  Copyright (C): www.cemf.ir
-  email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
-Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
-  granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
-  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
-  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
------------------------------------------------------------------------------*/
-
-#ifndef __scatteredMasterDistributeChar_hpp__ 
-#define __scatteredMasterDistributeChar_hpp__
-
-#include "scatteredMasterDistribute.hpp"
-
-namespace pFlow::MPI
-{
-
-template<>
-class scatteredMasterDistribute<char>
-:
-	public procCommunication
-{
-protected:
-
-	procVector<DataType>			indexedMap_;
-
-    size_t                          sizeOfElement_;
-
-    void freeIndexedMap();
-
-public:
-
-	scatteredMasterDistribute(
-        size_t sizeOfElement, 
-        const localProcessors& procs);
-
-	~scatteredMasterDistribute()
-    {
-        freeIndexedMap();
-    }
-
-	scatteredMasterDistribute(const scatteredMasterDistribute&)=delete;
-
-	scatteredMasterDistribute& operator=(const scatteredMasterDistribute&) = delete;
-
-	bool setDataMaps(procVector<span<uint32>>& maps);
-
-    bool setDataMaps(procVector<span<int32>>& maps);
-	
-	bool distribute(span<char>& sendBuff, span<char>& recvb);
-	
-};
-
-} // pFlow::MPI
-
-#endif //__scatteredMasterDistributeChar_hpp__

From 6f48eca95b07ea4758a665e6567fd5cb17f1d4e3 Mon Sep 17 00:00:00 2001
From: HRN <hamid.r.norouzi@gmail.com>
Date: Tue, 30 Apr 2024 00:28:29 +0330
Subject: [PATCH 07/14] The problem with memory leak in MPI data transfer fixed
 and tested.

---
 .../processorBoundaryContactSearch.cpp        |  8 +-
 .../twoPartContactSearch.cpp                  |  2 +-
 .../processorBoundarySphereInteraction.cpp    |  9 ++-
 .../pointField/processorBoundaryField.cpp     | 10 +--
 .../pointField/processorBoundaryField.hpp     |  7 +-
 .../boundaries/boundaryProcessor.cpp          | 15 ++--
 .../boundaries/boundaryProcessor.hpp          | 14 +---
 .../boundaries/dataReciever.hpp               | 66 +++++++---------
 .../pointStructure/boundaries/dataSender.hpp  | 76 +++++++++++--------
 9 files changed, 99 insertions(+), 108 deletions(-)

diff --git a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
index 9f9384e9..8ab8e61d 100644
--- a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
+++ b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
@@ -85,7 +85,7 @@ bool pFlow::processorBoundaryContactSearch::broadSearch
 {
     if(masterSearch_)
     {
-        /*const auto thisPoints = boundary().thisPoints();
+        const auto thisPoints = boundary().thisPoints();
         const auto& neighborProcPoints = boundary().neighborProcPoints();
         const auto& bDiams = diameter_.BoundaryField(thisBoundaryIndex());
         const auto thisDiams = bDiams.thisField();
@@ -96,9 +96,9 @@ bool pFlow::processorBoundaryContactSearch::broadSearch
             thisPoints, 
             thisDiams,
             neighborProcPoints,
-            neighborProcDiams);
-
-        pOutput<<"ppPairs size in boundary"<< ppPairs.size()<<endl;      */
+            neighborProcDiams
+        );
+        //pOutput<<"ppSize "<< ppPairs.size()<<endl;
         return true;
 
     }else
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
index 2f0e4089..8f4fcf42 100644
--- a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
@@ -99,7 +99,7 @@ bool pFlow::twoPartContactSearch::broadSearchPP
 			ppPairs.increaseCapacityBy(len);
 
 			INFORMATION<< "Particle-particle contact pair container capacity increased from "<<
-			oldCap << " to "<<ppPairs.capacity()<<" in peiodicBoundaryContactSearch."<<END_INFO;
+			oldCap << " to "<<ppPairs.capacity()<<" in contact search in boundary region."<<END_INFO;
 			
 		}
 
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
index ef09f0b5..56243ae6 100644
--- a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
@@ -47,9 +47,8 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
 	
 	const auto & sphPar = this->sphParticles();
 	uint32 thisIndex = this->boundary().thisBoundaryIndex();
-	const auto& a = sphPar.diameter().BoundaryField(thisIndex).neighborProcField().deviceViewAll();
-
-	/*pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
+	pOutput<<"beofre sphereSphereInteraction"<<endl;
+	pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
 		dt,
 		this->ppPairs(),
 		cfModel,
@@ -67,7 +66,9 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
 		sphPar.rVelocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
 		sphPar.contactForce().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
 		sphPar.contactTorque().BoundaryField(thisIndex).neighborProcField().deviceViewAll()
-	);*/
+	);
+
+	pOutput<<"after sphereSphereInteraction"<<endl;
 
 	return true;
 }
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
index 2595ebaa..ab0225e3 100644
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
@@ -24,13 +24,13 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::checkDataRecieved() const
 {
 	if (!dataRecieved_)
 	{
-		//uint32 nRecv  = reciever_.waitComplete();
+		uint32 nRecv = reciever_.waitBufferForUse();
 		dataRecieved_ = true;
-		/*if (nRecv != this->neighborProcSize())
+		if (nRecv != this->neighborProcSize())
 		{
 			fatalErrorInFunction;
 			fatalExit;
-		}*/
+		}
 	}
 }
 
@@ -41,7 +41,7 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
   DataDirection direction
 )
 {
-	/*if (step == 1)
+	if (step == 1)
 	{
 		// Isend
 		if (direction == DataDirection::TwoWay || 
@@ -67,7 +67,7 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
 	{
 		fatalErrorInFunction << "Invalid step number " << step << endl;
 		return false;
-	}*/
+	}
 
 	return true;
 }
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
index 5fb0780a..0a6bad28 100644
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
@@ -50,11 +50,11 @@ public:
 
 private:
 
-	dataSender<T, MemorySpace>           sender_;
+	dataSender<T, MemorySpace>   sender_;
 
-	mutable dataReciever<T, MemorySpace> reciever_;
+	dataReciever<T, MemorySpace> reciever_;
 
-	mutable bool                          dataRecieved_ = true;
+	mutable bool                 dataRecieved_ = true;
 
 	void checkDataRecieved()const;
 
@@ -82,7 +82,6 @@ public:
 	
 	ProcVectorType& neighborProcField() override;
 
-	
 	const ProcVectorType& neighborProcField()const override;
 
 	bool hearChanges
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
index 50098e0a..2648cc04 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
@@ -25,11 +25,7 @@ Licence:
 void
 pFlow::MPI::boundaryProcessor::checkSize() const
 {
-	if (!sizeObtained_)
-	{
-		//MPI_Wait(&sizeRequest_, StatusIgnore);
-		sizeObtained_ = true;
-	}
+	
 }
 
 void
@@ -37,13 +33,13 @@ pFlow::MPI::boundaryProcessor::checkDataRecieved() const
 {
 	if (!dataRecieved_)
 	{
-		//uint32 nRecv  = reciever_.waitComplete();
+		uint32 nRecv = reciever_.waitBufferForUse();
 		dataRecieved_ = true;
-		/*if (nRecv != neighborProcSize())
+		if (nRecv != neighborProcSize())
 		{
 			fatalErrorInFunction;
 			fatalExit;
-		}*/
+		}
 	}
 }
 
@@ -92,8 +88,7 @@ pFlow::MPI::boundaryProcessor::beforeIteration(uint32 iterNum, real t, real dt)
 		pFlowProcessors().localCommunicator(),
 		MPI_STATUS_IGNORE
 	);
-
-	sizeObtained_ = false;
+	MPI_Request_free(&req);
 
 	return true;
 }
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
index cb278461..1f96263d 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
@@ -38,21 +38,13 @@ private:
 
 	uint32                       neighborProcNumPoints_ = 0;
 
-	uint32                       thisNumPoints_;
+	uint32                       thisNumPoints_ = 0;
 
 	realx3Vector_D               neighborProcPoints_;
 
-	mutable Request              sizeRequest_;
+	dataSender<realx3>   sender_;
 
-	mutable Request 			sSizeRequest_;
-
-	int req_=0;
-
-	mutable bool                 sizeObtained_ = true;
-
-	mutable dataSender<realx3>   sender_;
-
-	mutable dataReciever<realx3> reciever_;
+	dataReciever<realx3> reciever_;
 
 	mutable bool                 dataRecieved_ = true;
 
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
index 13069b2a..962146eb 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
@@ -27,13 +27,11 @@ private:
     
     BufferVectorType buffer_;
 
-    std::vector<T>  buffer0_;
-
 	int              fromProc_;
 
 	int              tag_;
 
-	Request          recvRequest_;
+	mutable Request  recvRequest_ = RequestNull;
 
 public:
 
@@ -46,34 +44,40 @@ public:
 
     ~dataReciever()=default;
 
+    uint32 waitBufferForUse()const
+    {
+        if(recvRequest_ != RequestNull)
+        {
+            Status status;
+            MPI_Wait(&recvRequest_, &status);
+            int count;
+            CheckMPI(getCount<T>(&status, count), true);
+
+            return static_cast<uint32>(count);
+        }
+        else
+            return buffer_.size();
+    }
+
     void recieveData(
         const localProcessors&      processors,
         uint32 numToRecv
     )
     {   
-        
-        buffer0_.clear();
-		buffer0_.resize(numToRecv);
-        MPI_Status status;
+        waitBufferForUse();
+        buffer_.clear();
+        buffer_.resize(numToRecv);
 
-        /*CheckMPI(recv(
-            buffer_.getSpan(), 
-            fromProc_, 
-            tag_, 
-            processors.localCommunicator(), 
-            &status), true);*/
-        MPI_Recv(
-            buffer0_.data(),
-            buffer0_.size(),
-            realx3Type__,
-            fromProc_,
-            tag_,
-            processors.localCommunicator(),
-            &status
+        CheckMPI(
+            Irecv(
+                buffer_.getSpan(), 
+                fromProc_, 
+                tag_, 
+                processors.localCommunicator(), 
+                &recvRequest_
+            ),
+            true
         );
-        int c;
-        getCount<realx3>(&status, c);
-        pOutput<<"Number of data recieved "<<c<<endl;
     }
 
     auto& buffer()
@@ -86,20 +90,6 @@ public:
         return buffer_;
     }
 
-    uint32 waitComplete()
-    {
-        
-        /*Status status;   
-        
-        CheckMPI(MPI_Wait(&recvRequest_, &status), true);
-
-        int count;
-        CheckMPI(getCount<T>(&status, count), true);
-
-        return static_cast<uint32>(count);*/
-        return buffer_.size();
-    }
-
 };
 
 }
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
index 11c1782f..6342009b 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
@@ -26,15 +26,13 @@ public:
 
 private:
 
-	//BufferVectorType buffer_;
-
-    std::vector<T> buffer_;
+	BufferVectorType buffer_;
 
 	int              toProc_;
 
 	int              tag_;
 
-	Request          sendRequest_ = RequestNull;
+	mutable Request  sendRequest_ = RequestNull;
 
 public:
 
@@ -44,7 +42,22 @@ public:
         tag_(tag)
     {}
 
-    ~dataSender()=default;
+    ~dataSender()
+    {
+        if(sendRequest_ != RequestNull)
+        {
+            MPI_Request_free(&sendRequest_);
+        }
+    }
+
+    bool waitBufferForUse()const
+    {
+        if(sendRequest_ != RequestNull)
+        {
+            MPI_Wait(&sendRequest_, StatusesIgnore);
+        }
+        return true;
+    }
 
     void sendData(
         const localProcessors&      processors,
@@ -52,17 +65,21 @@ public:
     )
     {
         using RPolicy = Kokkos::RangePolicy<
-                DefaultExecutionSpace,
+                execution_space,
                 Kokkos::Schedule<Kokkos::Static>,
                 Kokkos::IndexType<pFlow::uint32>>;
 
         uint32 n = scatterField.size();
+        
+        // make sure the buffer is ready to be used and free 
+        // the previous request (if any).
+        waitBufferForUse();
 
         // clear the buffer to prevent data copy if capacity increases 
         buffer_.clear();
         buffer_.resize(n);
         
-        auto* buffView = buffer_.data();
+        const auto& buffView = buffer_.deviceViewAll();
 
         Kokkos::parallel_for(
             "dataSender::sendData",
@@ -73,26 +90,20 @@ public:
             }
         );
         Kokkos::fence();
-        auto req = MPI_REQUEST_NULL;
-
-        MPI_Isend(
-            buffer_.data(),
-            buffer_.size(),
-            realx3Type__,
-            toProc_,
-            tag_,
-            processors.localCommunicator(),
-            &req);
         
-        /*CheckMPI(send(
-            buffer_.getSpan(), 
-            toProc_, 
-            tag_, 
-            processors.localCommunicator(), 
-            MPI_STATUS_IGNORE), true);*/
+        CheckMPI(
+            Isend(buffer_.getSpan(),
+                toProc_,
+                tag_,
+                processors.localCommunicator(),
+                &sendRequest_
+            ), 
+            true
+        );
+                
     }
 
-    /*auto& buffer()
+    auto& buffer()
     {
         return buffer_;
     }
@@ -100,17 +111,20 @@ public:
     const auto& buffer()const
     {
         return buffer_;
-    }*/
+    }
 
     bool sendComplete()
     {
-        return true;
-        /*int test;   
-        MPI_Test(&sendRequest_, &test, StatusIgnore);
-        if(test) 
-            return true;
+        int test;
+        if(sendRequest_ != RequestNull)   
+        {
+            MPI_Test(&sendRequest_, &test, StatusIgnore);
+            return test;
+        }
         else
-            return false;*/
+        {
+            return true;
+        }
     }
 
 };

From 68b7d141fa26af1d3130b75464bebd6430232b12 Mon Sep 17 00:00:00 2001
From: HRN <hamid.r.norouzi@gmail.com>
Date: Sun, 5 May 2024 22:54:12 +0330
Subject: [PATCH 08/14] boundaryProcessor -> transferData - point data is being
 transferred (no notification yet). - field data should be transferred

---
 .../processorBoundarySphereInteraction.cpp    |   5 +-
 .../MPI/mpiCommunication.hpp                  |  25 +++
 .../pointField/processorBoundaryField.cpp     |   2 +
 .../boundaries/boundaryProcessor.cpp          | 150 ++++++++++++++++++
 .../boundaries/boundaryProcessor.hpp          | 127 +++++++--------
 5 files changed, 244 insertions(+), 65 deletions(-)

diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
index 56243ae6..809b3c6f 100644
--- a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
@@ -32,9 +32,7 @@ pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::processorBoundarySpher
 		geomMotion
 	),
 	masterInteraction_(boundary.isBoundaryMaster())
-{
-	pOutput<<"Processor boundayrCondition for "<< boundary.name()<<endl;
-}
+{}
 
 template <typename cFM, typename gMM>
 bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInteraction
@@ -43,6 +41,7 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
 	const ContactForceModel &cfModel
 )
 {
+	return true;
 	if(!masterInteraction_) return true;
 	
 	const auto & sphPar = this->sphParticles();
diff --git a/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp b/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
index 4fd5e260..27d259eb 100644
--- a/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
+++ b/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
@@ -238,6 +238,18 @@ inline auto send(span<T> data, int dest, int tag, Comm comm)
         comm);
 }
 
+template<typename T> 
+inline auto send(const T& data, int dest, int tag, Comm comm)
+{
+	return MPI_Send(
+        &data, 
+        sFactor<T>(), 
+        Type<T>(), 
+        dest, 
+        tag, 
+        comm);
+}
+
 template<typename T>
 inline auto Isend(span<T> data, int dest, int tag, Comm comm, Request* req)
 {
@@ -277,6 +289,19 @@ inline auto recv(span<T> data, int source, int tag, Comm comm, Status *status)
         status);
 }
 
+template<typename T>
+inline auto recv(T& data, int source, int tag, Comm comm, Status *status)
+{
+	return MPI_Recv(
+        &data, 
+        sFactor<T>(), 
+        Type<T>(), 
+        source, 
+        tag, 
+        comm, 
+        status);
+}
+
 template<typename T>
 inline auto Irecv(T& data, int source, int tag, Comm comm, Request* req)
 {
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
index ab0225e3..164a2fe6 100644
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
@@ -90,6 +90,8 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::processorBoundaryField(
       boundary.mirrorBoundaryIndex()
     )
 {
+	this->addEvent(message::BNDR_PROCTRANS1).
+	addEvent(message::BNDR_PROCTRANS2);
 }
 
 template<class T, class MemorySpace>
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
index 2648cc04..246959b1 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
@@ -21,6 +21,8 @@ Licence:
 #include "boundaryProcessor.hpp"
 #include "dictionary.hpp"
 #include "mpiCommunication.hpp"
+#include "boundaryBaseKernels.hpp"
+#include "internalPoints.hpp"
 
 void
 pFlow::MPI::boundaryProcessor::checkSize() const
@@ -130,6 +132,105 @@ pFlow::MPI::boundaryProcessor::updataBoundary(int step)
 	return true;
 }
 
+bool pFlow::MPI::boundaryProcessor::transferData(int step)
+{
+    if(step==1)
+	{
+		uint32 s = size();
+		uint32Vector_D transferFlags("transferFlags",s+1, s+1, RESERVE());
+		transferFlags.fill(0u);
+
+		const auto& transferD = transferFlags.deviceViewAll();
+		auto points = thisPoints();
+		auto p = boundaryPlane().infPlane();
+
+		numToTransfer_ = 0;	
+
+		Kokkos::parallel_reduce
+		(
+			"boundaryProcessor::afterIteration",
+			deviceRPolicyStatic(0,s),
+			LAMBDA_HD(uint32 i, uint32& transferToUpdate)
+			{
+				if(p.pointInNegativeSide(points(i)))
+				{
+					transferD(i)=1;
+					transferToUpdate++;
+				}
+			}, 
+			numToTransfer_
+		);
+			
+		uint32Vector_D keepIndices("keepIndices");
+		if(numToTransfer_ != 0u)
+		{
+			pFlow::boundaryBaseKernels::createRemoveKeepIndices
+			(
+				indexList(),
+				numToTransfer_,
+				transferFlags,
+				transferIndices_,
+				keepIndices,
+				false
+			);
+			// delete transfer point from this processor 
+			if( !setRemoveKeepIndices(transferIndices_, keepIndices))
+			{
+				fatalErrorInFunction<<
+				"error in setting transfer and keep points in boundary "<< name()<<endl;
+				return false;
+			}
+		}
+		else
+		{
+			transferIndices_.clear();
+		}
+
+		auto req = RequestNull;
+		CheckMPI( Isend(
+			numToTransfer_, 
+			neighborProcessorNo(), 
+			thisBoundaryIndex(),
+			pFlowProcessors().localCommunicator(),
+			&req), true );
+
+		CheckMPI(recv(
+			numToRecieve_,
+			neighborProcessorNo(),
+			mirrorBoundaryIndex(),
+			pFlowProcessors().localCommunicator(),
+			StatusesIgnore), true);
+
+		MPI_Request_free(&req);
+		return true;
+	}
+	else if(step ==2 )
+	{
+		pointFieldAccessType transferPoints(
+			transferIndices_.size(), 
+			transferIndices_.deviceViewAll(),
+			internal().pointPositionDevice());
+
+		sender_.sendData(pFlowProcessors(), transferPoints);
+		return true;
+	}
+	else if(step == 3)
+	{
+		
+		reciever_.recieveData(pFlowProcessors(), numToRecieve_);
+		return true;
+	}
+	else if(step == 4)
+	{
+		reciever_.waitBufferForUse();
+		// 		
+		return false;
+	}
+
+	return false;
+	
+}
+
 bool
 pFlow::MPI::boundaryProcessor::iterate(uint32 iterNum, real t, real dt)
 {
@@ -139,5 +240,54 @@ pFlow::MPI::boundaryProcessor::iterate(uint32 iterNum, real t, real dt)
 bool
 pFlow::MPI::boundaryProcessor::afterIteration(uint32 iterNum, real t, real dt)
 {
+	
+	uint32 s = size();
+	pOutput<<"size of boundary is "<< s <<endl;
+	uint32Vector_D transferFlags("transferFlags",s+1, s+1, RESERVE());
+	transferFlags.fill(0u);
+
+	const auto& transferD = transferFlags.deviceViewAll();
+	auto points = thisPoints();
+	auto p = boundaryPlane().infPlane();
+
+	uint32 numTransfer = 0;	
+
+	Kokkos::parallel_reduce
+	(
+		"boundaryProcessor::afterIteration",
+		deviceRPolicyStatic(0,s),
+		LAMBDA_HD(uint32 i, uint32& transferToUpdate)
+		{
+			if(p.pointInNegativeSide(points(i)))
+			{
+				transferD(i)=1;
+				transferToUpdate++;
+			}
+		}, 
+		numTransfer
+	);
+		
+	pOutput<<"Numebr to be transfered "<< numTransfer<<endl;
+		
+	uint32Vector_D transferIndices("transferIndices");
+	uint32Vector_D keepIndices("keepIndices");
+
+	pFlow::boundaryBaseKernels::createRemoveKeepIndices
+	(
+		indexList(),
+		numTransfer,
+		transferFlags,
+		transferIndices,
+		keepIndices
+	);
+	
+	// delete transfer point from this processor 
+	if( !setRemoveKeepIndices(transferIndices, keepIndices))
+	{
+		fatalErrorInFunction<<
+		"error in setting transfer and keep points in boundary "<< name()<<endl;
+		return false;
+	}
+	
 	return true;
 }
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
index 1f96263d..a222cabe 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
@@ -1,18 +1,18 @@
 /*------------------------------- phasicFlow ---------------------------------
-      O        C enter of
-     O O       E ngineering and
-    O   O      M ultiscale modeling of
-   OOOOOOO     F luid flow       
+	  O        C enter of
+	 O O       E ngineering and
+	O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
 ------------------------------------------------------------------------------
   Copyright (C): www.cemf.ir
   email: hamid.r.norouzi AT gmail.com
-------------------------------------------------------------------------------  
+------------------------------------------------------------------------------
 Licence:
-  This file is part of phasicFlow code. It is a free software for simulating 
+  This file is part of phasicFlow code. It is a free software for simulating
   granular and multiphase flows. You can redistribute it and/or modify it under
-  the terms of GNU General Public License v3 or any other later versions. 
- 
-  phasicFlow is distributed to help others in their research in the field of 
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
   granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
   implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
@@ -21,7 +21,6 @@ Licence:
 #ifndef __boundaryProcessor_hpp__
 #define __boundaryProcessor_hpp__
 
-
 #include "boundaryBase.hpp"
 #include "mpiTypes.hpp"
 #include "dataSender.hpp"
@@ -30,78 +29,82 @@ Licence:
 namespace pFlow::MPI
 {
 
-class boundaryProcessor
-:
- 	public boundaryBase
-{
-private:
+	class boundaryProcessor
+		: public boundaryBase
+	{
+	public:
+		using pointFieldAccessType = typename boundaryBase::pointFieldAccessType;
 
-	uint32                       neighborProcNumPoints_ = 0;
+	private:
+		uint32               neighborProcNumPoints_ = 0;
 
-	uint32                       thisNumPoints_ = 0;
+		uint32               thisNumPoints_ = 0;
 
-	realx3Vector_D               neighborProcPoints_;
+		realx3Vector_D       neighborProcPoints_;
 
-	dataSender<realx3>   sender_;
+		dataSender<realx3>   sender_;
 
-	dataReciever<realx3> reciever_;
+		dataReciever<realx3> reciever_;
 
-	mutable bool                 dataRecieved_ = true;
+		mutable bool         dataRecieved_ = true;
 
-	void checkSize()const;
+		uint32               numToTransfer_ = 0;
 
-	void checkDataRecieved()const;
-	
-	/// @brief  Update processor boundary data for this processor
-	/// @param step It is either 1 or 2 in the input to indicate 
-	/// the update step 
-	/// @return true if successful 
-	/// @details This method is called by boundaryList two times to 
-	/// allow processor boundaries to exchange data in two steps.
-	/// The first step is a buffered non-blocking send and the second
-	/// step is non-blocking recieve to get data. 
-	bool updataBoundary(int step)override;
+		uint32               numToRecieve_ = 0;
 
-public:
+		uint32Vector_D       transferIndices_{"transferIndices"};
 
-	TypeInfo("boundary<processor>");
+		void checkSize() const;
 
-	boundaryProcessor(
-	  const dictionary& dict,
-	  const plane&      bplane,
-	  internalPoints&   internal,
-	  boundaryList&     bndrs,
-	  uint32            thisIndex
-	);
+		void checkDataRecieved() const;
 
-	~boundaryProcessor() override = default;
+		/// @brief  Update processor boundary data for this processor
+		/// @param step It is either 1 or 2 in the input to indicate
+		/// the update step
+		/// @return true if successful
+		/// @details This method is called by boundaryList two times to
+		/// allow processor boundaries to exchange data in two steps.
+		/// The first step is a buffered non-blocking send and the second
+		/// step is non-blocking recieve to get data.
+		bool updataBoundary(int step) override;
 
-	add_vCtor
-	(
-		boundaryBase, 
-		boundaryProcessor, 
-		dictionary
-	);
+		bool transferData(int step) override;
 
-	bool beforeIteration(uint32 iterNum, real t, real dt) override;
+	public:
+		TypeInfo("boundary<processor>");
 
-	bool iterate(uint32 iterNum, real t, real dt) override;
+		boundaryProcessor(
+			const dictionary &dict,
+			const plane &bplane,
+			internalPoints &internal,
+			boundaryList &bndrs,
+			uint32 thisIndex);
 
-	bool afterIteration(uint32 iterNum, real t, real dt) override;
+		~boundaryProcessor() override = default;
 
-	/// @brief Return number of points in the neighbor processor boundary.
-	/// This is overriden from boundaryBase.
-	uint32 neighborProcSize() const override;
+		add_vCtor(
+			boundaryBase,
+			boundaryProcessor,
+			dictionary);
 
-	/// @brief Return a reference to point positions in the neighbor
-	/// processor boundary.
-	realx3Vector_D& neighborProcPoints() override;
+		bool beforeIteration(uint32 iterNum, real t, real dt) override;
 
-	/// @brief Return a const reference to point positions in the
-	/// neighbor processor boundary.
-	const realx3Vector_D& neighborProcPoints() const override;
+		bool iterate(uint32 iterNum, real t, real dt) override;
 
-};
+		bool afterIteration(uint32 iterNum, real t, real dt) override;
+
+		/// @brief Return number of points in the neighbor processor boundary.
+		/// This is overriden from boundaryBase.
+		uint32 neighborProcSize() const override;
+
+		/// @brief Return a reference to point positions in the neighbor
+		/// processor boundary.
+		realx3Vector_D &neighborProcPoints() override;
+
+		/// @brief Return a const reference to point positions in the
+		/// neighbor processor boundary.
+		const realx3Vector_D &neighborProcPoints() const override;
+	};
 
 } // namespace pFlow::MPI
 

From 665879f8caebc2af6c2742ad1fd33a7d1b7f7a62 Mon Sep 17 00:00:00 2001
From: HRN <hamid.r.norouzi@gmail.com>
Date: Sun, 12 May 2024 19:06:53 +0330
Subject: [PATCH 09/14] Data transfer between processors

- Data transfer is done
- contact search at processor boundary is done
- contact force calculation at processor boundary is done
- tests have been done on rotating drum using serial and openMP
---
 .../processorBoundaryContactSearch.cpp        |   9 +-
 .../processorBoundaryContactSearch.hpp        |   2 +
 .../twoPartContactSearch.cpp                  |  11 +-
 .../twoPartContactSearch.hpp                  |   3 +-
 .../twoPartContactSearchKernels.cpp           |   4 +-
 .../processorBoundarySphereInteraction.cpp    | 140 ++++++++++++++----
 .../processorBoundarySphereInteraction.hpp    |  10 +-
 .../domain/MPISimulationDomain.cpp            |  12 ++
 .../pointField/processorBoundaryField.cpp     | 126 +++++++++++++++-
 .../pointField/processorBoundaryField.hpp     |  26 ++--
 .../boundaries/boundaryProcessor.cpp          | 103 +++++++++++--
 .../boundaries/boundaryProcessor.hpp          |  20 ++-
 .../boundaries/dataReciever.hpp               |  44 +++++-
 .../pointStructure/boundaries/dataSender.hpp  |  50 ++++++-
 14 files changed, 483 insertions(+), 77 deletions(-)

diff --git a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
index 8ab8e61d..8281c55c 100644
--- a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
+++ b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
@@ -52,7 +52,8 @@ pFlow::processorBoundaryContactSearch::processorBoundaryContactSearch(
 : 
     boundaryContactSearch(dict, boundary, cSearch),
     diameter_(cSearch.Particles().boundingSphere()),
-    masterSearch_(this->isBoundaryMaster())
+    masterSearch_(this->isBoundaryMaster()),
+    sizeRatio_(dict.getVal<real>("sizeRatio"))
 {
     
     if(masterSearch_)
@@ -65,7 +66,8 @@ pFlow::processorBoundaryContactSearch::processorBoundaryContactSearch(
         
         ppContactSearch_ = makeUnique<twoPartContactSearch>(
             searchBox_,
-            maxD);
+            maxD,
+            sizeRatio_);
     }
     else
     {
@@ -96,7 +98,8 @@ bool pFlow::processorBoundaryContactSearch::broadSearch
             thisPoints, 
             thisDiams,
             neighborProcPoints,
-            neighborProcDiams
+            neighborProcDiams,
+            name()
         );
         //pOutput<<"ppSize "<< ppPairs.size()<<endl;
         return true;
diff --git a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp
index fffd576b..8f4477b0 100644
--- a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp
+++ b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp
@@ -39,6 +39,8 @@ private:
 
 	bool                            masterSearch_;
 
+	real 							sizeRatio_;
+
 	void                            setSearchBox();
 
 public:
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
index 8f4fcf42..a7e61f86 100644
--- a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
@@ -64,7 +64,9 @@ bool pFlow::twoPartContactSearch::broadSearchPP
 	const realx3& transferVec
 )
 {
-	
+	if(points1.empty())return true;
+	if(points2.empty()) return true;
+
 	buildList(points1);
 
 	uint32 nNotInserted = 1;
@@ -114,7 +116,8 @@ bool pFlow::twoPartContactSearch::broadSearchPP
 	const deviceScatteredFieldAccess<realx3> &points1,
 	const deviceScatteredFieldAccess<real> &diams1,
 	const realx3Vector_D& points2,
-	const realVector_D& diams2
+	const realVector_D& diams2,
+	const word& name
 )
 {
 	buildList(points1);
@@ -148,9 +151,9 @@ bool pFlow::twoPartContactSearch::broadSearchPP
 			auto oldCap = ppPairs.capacity();
 			
 			ppPairs.increaseCapacityBy(len);
-
+			
 			INFORMATION<< "Particle-particle contact pair container capacity increased from "<<
-			oldCap << " to "<<ppPairs.capacity()<<" in peiodicBoundaryContactSearch."<<END_INFO;
+			oldCap << " to "<<ppPairs.capacity()<<" in boundary contact search in "<< name <<END_INFO;
 			
 		}
 
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp
index 53aa1ce4..b2f54b8b 100644
--- a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp
@@ -85,7 +85,8 @@ public:
 		const deviceScatteredFieldAccess<realx3> &points1,
 		const deviceScatteredFieldAccess<real> &diams1,
 		const realx3Vector_D& points2,
-		const realVector_D& diams2);
+		const realVector_D& diams2,
+		const word& name);
 
 	const auto& searchCells()const
 	{
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
index 56f1885d..515e5af1 100644
--- a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
@@ -20,9 +20,7 @@ pFlow::twoPartContactSearchKernels::buildNextHead(
   deviceViewType1D<uint32>&                 next
 )
 {
-	if (points.empty())
-		return;
-
+	
 	uint32 n = points.size();
 
 	Kokkos::parallel_for(
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
index 809b3c6f..f0e2a9a1 100644
--- a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
@@ -32,42 +32,126 @@ pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::processorBoundarySpher
 		geomMotion
 	),
 	masterInteraction_(boundary.isBoundaryMaster())
-{}
+	,
+	inter_("inter"),
+	send_("send"),
+	recv_("recv"),
+	add_("add")
+{
+	if(masterInteraction_)
+	{
+		this->allocatePPPairs();
+		this->allocatePWPairs();
+	}
+		
+}
 
 template <typename cFM, typename gMM>
 bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInteraction
 (
 	real dt,
-	const ContactForceModel &cfModel
+	const ContactForceModel &cfModel,
+	uint32 step
 )
 {
-	return true;
-	if(!masterInteraction_) return true;
 	
-	const auto & sphPar = this->sphParticles();
-	uint32 thisIndex = this->boundary().thisBoundaryIndex();
-	pOutput<<"beofre sphereSphereInteraction"<<endl;
-	pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
-		dt,
-		this->ppPairs(),
-		cfModel,
-		this->boundary().thisPoints(),
-		sphPar.diameter().deviceViewAll(),
-		sphPar.propertyId().deviceViewAll(),
-		sphPar.velocity().deviceViewAll(),
-		sphPar.rVelocity().deviceViewAll(),
-		sphPar.contactForce().deviceViewAll(),
-		sphPar.contactTorque().deviceViewAll(),
-		this->boundary().neighborProcPoints().deviceViewAll(),
-		sphPar.diameter().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
-		sphPar.propertyId().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
-		sphPar.velocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
-		sphPar.rVelocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
-		sphPar.contactForce().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
-		sphPar.contactTorque().BoundaryField(thisIndex).neighborProcField().deviceViewAll()
-	);
+	// master processor calculates the contact force/torque and sends data back to the 
+	// neighbor processor (slave processor).
+	// slave processor recieves the data and adds the data to the internalField 
+	if(masterInteraction_) 
+	{
+		if(step==1)return true;
 
-	pOutput<<"after sphereSphereInteraction"<<endl;
+		const auto & sphPar = this->sphParticles();
+		uint32 thisIndex = this->boundary().thisBoundaryIndex();
+		
+		const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&> (
+		sphPar.contactForce().BoundaryField(thisIndex));
 
-	return true;
+		const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+		sphPar.contactTorque().BoundaryField(thisIndex));
+
+		if(step == 2 )
+		{
+			iter++;
+			inter_.start();
+			pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
+				dt,
+				this->ppPairs(),
+				cfModel,
+				this->boundary().thisPoints(),
+				sphPar.diameter().deviceViewAll(),
+				sphPar.propertyId().deviceViewAll(),
+				sphPar.velocity().deviceViewAll(),
+				sphPar.rVelocity().deviceViewAll(),
+				sphPar.contactForce().deviceViewAll(),
+				sphPar.contactTorque().deviceViewAll(),
+				this->boundary().neighborProcPoints().deviceViewAll(),
+				sphPar.diameter().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.propertyId().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.velocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.rVelocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				cfBndry.neighborProcField().deviceViewAll(),
+				ctBndry.neighborProcField().deviceViewAll()
+			);
+			inter_.end();
+			return true;
+		}		
+		else if(step == 3 )
+		{
+			send_.start();
+			cfBndry.sendBackData();
+			ctBndry.sendBackData();
+			send_.end();
+			return true;
+		}
+		
+		if(iter % 1000 == 0u)
+		{
+			pOutput<<"inter "<< inter_.totalTime()<<endl;
+			pOutput<<"send "<< send_.totalTime()<<endl<<endl;;
+		}
+		return false;	
+	}
+	else
+	{
+		
+		const auto & sphPar = this->sphParticles();
+		uint32 thisIndex = this->boundary().thisBoundaryIndex();
+		const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&>(
+		sphPar.contactForce().BoundaryField(thisIndex));
+		const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+		sphPar.contactTorque().BoundaryField(thisIndex));
+		if(step==1)
+		{
+			recv_.start();
+			cfBndry.recieveBackData();
+			ctBndry.recieveBackData();
+			recv_.end();
+			return false;
+		}
+		else if(step == 2)
+		{
+			iter++;
+			return true;
+		}
+		else if(step == 3)
+		{
+			add_.start();
+			cfBndry.addBufferToInternalField();
+			ctBndry.addBufferToInternalField();
+			add_.end();
+			return true;
+		}
+		
+		if(iter % 1000 == 0u)
+		{
+			pOutput<<"recive "<< recv_.totalTime()<<endl;
+			pOutput<<"add "<< add_.totalTime()<<endl<<endl;
+		}
+
+		return false;
+	}
+
+	return false;
 }
\ No newline at end of file
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
index d3c56c04..a07d434a 100644
--- a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
@@ -21,6 +21,7 @@ Licence:
 #define __processorBoundarySphereInteraction_hpp__
 
 #include "boundarySphereInteraction.hpp"
+#include "processorBoundaryField.hpp"
 
 namespace pFlow::MPI
 {
@@ -56,6 +57,12 @@ private:
     
     bool    masterInteraction_;
 
+    Timer   inter_;
+    Timer   send_;
+    Timer   recv_;
+    Timer   add_;
+    uint32  iter=0;
+
 public:
 
     TypeInfoTemplate22("boundarySphereInteraction", "processor",ContactForceModel, MotionModel);
@@ -78,7 +85,8 @@ public:
 
     bool sphereSphereInteraction(
         real dt,
-		const ContactForceModel& cfModel)override;
+		const ContactForceModel& cfModel,
+        uint32 step)override;
 	
 };
 
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
index 3e23d15f..87b050eb 100644
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
@@ -41,6 +41,9 @@ bool pFlow::MPI::MPISimulationDomain::createBoundaryDicts()
     auto& mpiBoundaries = this->subDict("MPIBoundaries");
 
     real neighborLength = boundaries.getVal<real>("neighborLength");
+    auto boundaryExtntionLengthRatio = 
+		boundaries.getValOrSet<real>("boundaryExtntionLengthRatio", 0.1);
+	auto updateIntercal = boundaries.getValOrSet<uint32>("updateInterval", 1u);
 
     auto neighbors = findPlaneNeighbors();
 
@@ -61,6 +64,15 @@ bool pFlow::MPI::MPISimulationDomain::createBoundaryDicts()
 			"in dictionary "<< boundaries.globalName()<<endl;
 			return false;
 		}
+        
+        if(!bDict.addOrReplace("updateInterval", updateIntercal))
+		{
+			fatalErrorInFunction<<"error in adding updateIntercal to "<< bName <<
+			"in dictionary "<< boundaries.globalName()<<endl;
+		}
+
+		bDict.addOrReplace("boundaryExtntionLengthRatio", boundaryExtntionLengthRatio);
+
         if( thisDomainActive_ )
         {
             if( neighbors[i] == -1 )
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
index 164a2fe6..820831b4 100644
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
@@ -1,3 +1,4 @@
+#include "processorBoundaryField.hpp"
 /*------------------------------- phasicFlow ---------------------------------
       O        C enter of
      O O       E ngineering and
@@ -90,8 +91,10 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::processorBoundaryField(
       boundary.mirrorBoundaryIndex()
     )
 {
-	this->addEvent(message::BNDR_PROCTRANS1).
-	addEvent(message::BNDR_PROCTRANS2);
+	this->addEvent(message::BNDR_PROCTRANSFER_SEND).
+	addEvent(message::BNDR_PROCTRANSFER_RECIEVE).
+	addEvent(message::BNDR_PROCTRANSFER_WAITFILL).
+	addEvent(message::BNDR_PROC_SIZE_CHANGED);
 }
 
 template<class T, class MemorySpace>
@@ -109,4 +112,123 @@ const typename pFlow::MPI::processorBoundaryField<T, MemorySpace>::
 {
 	checkDataRecieved();
 	return reciever_.buffer();
+}
+
+template<class T, class MemorySpace>
+bool pFlow::MPI::processorBoundaryField<T, MemorySpace>::hearChanges(
+	real t,
+	real dt,
+	uint32 iter,
+	const message& msg, 
+	const anyList& varList
+)
+{
+	BoundaryFieldType::hearChanges(t,dt,iter, msg,varList);
+	if(msg.equivalentTo(message::BNDR_PROC_SIZE_CHANGED))
+	{
+		auto newProcSize = varList.getObject<uint32>("size");
+		reciever_.resize(newProcSize);
+	}
+
+	if(msg.equivalentTo(message::BNDR_PROCTRANSFER_SEND))
+	{
+		const auto& indices = varList.getObject<uint32Vector_D>(
+			message::eventName(message::BNDR_PROCTRANSFER_SEND)
+		);
+
+		FieldAccessType transferData(
+			indices.size(), 
+			indices.deviceViewAll(),
+			this->internal().deviceViewAll()
+		);
+		sender_.sendData(pFlowProcessors(),transferData);
+	}
+	else if(msg.equivalentTo(message::BNDR_PROCTRANSFER_RECIEVE))
+	{
+		uint32 numRecieved = varList.getObject<uint32>(
+			message::eventName(message::BNDR_PROCTRANSFER_RECIEVE)
+		);
+		reciever_.recieveData(pFlowProcessors(), numRecieved);
+	}
+	else if(msg.equivalentTo(message::BNDR_PROCTRANSFER_WAITFILL))
+	{
+		
+		uint32 numRecieved = reciever_.waitBufferForUse();
+
+		if(msg.equivalentTo(message::CAP_CHANGED))
+		{
+			auto newCap = varList.getObject<uint32>(
+				message::eventName(message::CAP_CHANGED));
+			this->internal().field().reserve(newCap);
+
+		}
+		if(msg.equivalentTo(message::SIZE_CHANGED))
+		{
+			auto newSize = varList.getObject<uint32>(
+				message::eventName(message::SIZE_CHANGED));
+			this->internal().field().resize(newSize);
+		}
+		
+		const auto& indices = varList.getObject<uint32IndexContainer>(
+			message::eventName(message::ITEM_INSERT));
+		
+		this->internal().field().insertSetElement(indices, reciever_.buffer().deviceView());
+
+		return true;
+	}
+
+	return true;
+}
+template <class T, class MemorySpace>
+void pFlow::MPI::processorBoundaryField<T, MemorySpace>::sendBackData() const
+{
+	reciever_.sendBackData(pFlowProcessors());
+	dataRecieved_ = false;
+}
+
+template <class T, class MemorySpace>
+void pFlow::MPI::processorBoundaryField<T, MemorySpace>::recieveBackData() const
+{
+	sender_.recieveBackData(pFlowProcessors(), this->size());
+}
+
+template <class T, class MemorySpace>
+void pFlow::MPI::processorBoundaryField<T, MemorySpace>::addBufferToInternalField()const
+{
+	using RPolicy = Kokkos::RangePolicy<
+		execution_space,
+		Kokkos::Schedule<Kokkos::Static>,
+		Kokkos::IndexType<pFlow::uint32>>;
+
+	sender_.waitBufferForUse();
+
+	const auto& buffView = sender_.buffer().deviceViewAll();
+	const auto& field = this->internal().deviceViewAll();
+
+	if constexpr( isDeviceAccessible<execution_space> )
+	{
+		const auto& indices = this->indexList().deviceViewAll();
+		Kokkos::parallel_for(
+			"dataSender::recieveBackData",
+			RPolicy(0,this->size()),
+			LAMBDA_HD(uint32 i)
+			{
+				field[indices[i]] += buffView[i]; 
+			}
+		);
+		Kokkos::fence();
+	}
+	else
+	{
+		const auto& indices = this->boundary().indexListHost().deviceViewAll();
+		Kokkos::parallel_for(
+			"dataSender::recieveBackData",
+			RPolicy(0,this->size()),
+			LAMBDA_HD(uint32 i)
+			{
+				field[indices[i]] += buffView[i]; 
+			}
+		);
+		Kokkos::fence();
+	}
 }
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
index 0a6bad28..fd2c72e0 100644
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
@@ -83,25 +83,25 @@ public:
 	ProcVectorType& neighborProcField() override;
 
 	const ProcVectorType& neighborProcField()const override;
+	
+	void fill(const T& val)override
+	{
+		reciever_.fill(val);
+	}
 
-	bool hearChanges
-	(
+	bool hearChanges(
 		real t,
 		real dt,
 		uint32 iter,
 		const message& msg, 
     	const anyList& varList
-	) override
-    {
-		BoundaryFieldType::hearChanges(t,dt,iter, msg,varList);
-		
-		if(msg.equivalentTo(message::BNDR_DELETE))
-		{
-			// do nothing;
-		}
-		
-		return true;
-	}
+	) override;
+    
+	void sendBackData()const;
+
+	void recieveBackData()const;
+
+	void addBufferToInternalField()const;
 
 };
 
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
index 246959b1..76be7508 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
@@ -23,12 +23,9 @@ Licence:
 #include "mpiCommunication.hpp"
 #include "boundaryBaseKernels.hpp"
 #include "internalPoints.hpp"
+#include "Time.hpp"
+#include "anyList.hpp"
 
-void
-pFlow::MPI::boundaryProcessor::checkSize() const
-{
-	
-}
 
 void
 pFlow::MPI::boundaryProcessor::checkDataRecieved() const
@@ -69,8 +66,11 @@ pFlow::MPI::boundaryProcessor::boundaryProcessor(
 bool
 pFlow::MPI::boundaryProcessor::beforeIteration(uint32 iterNum, real t, real dt)
 {
+
 	thisNumPoints_ = size();
 
+	uint32 oldNeighborProcNumPoints = neighborProcNumPoints_;
+
 	auto req = MPI_REQUEST_NULL;
 	MPI_Isend(
 		&thisNumPoints_,
@@ -92,13 +92,24 @@ pFlow::MPI::boundaryProcessor::beforeIteration(uint32 iterNum, real t, real dt)
 	);
 	MPI_Request_free(&req);
 
+	anyList varList;
+	message msg;
+
+	varList.emplaceBack(msg.addAndName(message::BNDR_PROC_SIZE_CHANGED), neighborProcNumPoints_);
+
+	if( !notify(iterNum, t, dt, msg, varList) )
+	{
+		fatalErrorInFunction;
+		return false;
+	}
+	
+
 	return true;
 }
 
 pFlow::uint32
 pFlow::MPI::boundaryProcessor::neighborProcSize() const
 {
-	checkSize();
 	return neighborProcNumPoints_;
 }
 
@@ -117,7 +128,7 @@ pFlow::MPI::boundaryProcessor::neighborProcPoints() const
 }
 
 bool
-pFlow::MPI::boundaryProcessor::updataBoundary(int step)
+pFlow::MPI::boundaryProcessor::updataBoundaryData(int step)
 {
 	if (step == 1)
 	{
@@ -132,8 +143,10 @@ pFlow::MPI::boundaryProcessor::updataBoundary(int step)
 	return true;
 }
 
-bool pFlow::MPI::boundaryProcessor::transferData(int step)
+bool pFlow::MPI::boundaryProcessor::transferData(uint32 iter, int step)
 {
+	if(!boundaryListUpdate(iter))return false;
+	
     if(step==1)
 	{
 		uint32 s = size();
@@ -206,24 +219,88 @@ bool pFlow::MPI::boundaryProcessor::transferData(int step)
 	}
 	else if(step ==2 )
 	{
+		if( transferIndices_.empty() )return true; 
+
 		pointFieldAccessType transferPoints(
-			transferIndices_.size(), 
-			transferIndices_.deviceViewAll(),
-			internal().pointPositionDevice());
+		transferIndices_.size(), 
+		transferIndices_.deviceViewAll(),
+		internal().pointPositionDevice());
 
 		sender_.sendData(pFlowProcessors(), transferPoints);
+		message msg;
+		anyList varList;
+		varList.emplaceBack( 
+		msg.addAndName(message::BNDR_PROCTRANSFER_SEND),
+		transferIndices_);
+
+		if(!notify(
+		internal().time().currentIter(),
+		internal().time().currentTime(),
+		internal().time().dt(),
+		msg,
+		varList))
+		{
+			fatalErrorInFunction;
+			return false;
+		} 
+
 		return true;
 	}
 	else if(step == 3)
 	{
-		
+		if(numToRecieve_ == 0u) return false;
 		reciever_.recieveData(pFlowProcessors(), numToRecieve_);
+		
+		message msg;
+		anyList varList;
+		varList.emplaceBack( 
+		msg.addAndName(message::BNDR_PROCTRANSFER_RECIEVE),
+		numToRecieve_);
+
+		if(!notify(
+		internal().time().currentIter(),
+		internal().time().currentTime(),
+		internal().time().dt(),
+		msg,
+		varList))
+		{
+			fatalErrorInFunction;
+			return false;
+		}
+
 		return true;
 	}
 	else if(step == 4)
 	{
+		if(numToRecieve_ == 0u) return false;
 		reciever_.waitBufferForUse();
-		// 		
+		
+		// points should be inserted first 
+		message msg(message::BNDR_PROCTRANSFER_WAITFILL);
+		anyList varList;
+
+		internal().insertPointsOnly(reciever_.buffer(), msg, varList);
+		const auto& indices = varList.getObject<uint32IndexContainer>(message::eventName(message::ITEM_INSERT));
+		auto indView = deviceViewType1D<uint32>(indices.deviceView().data(), indices.deviceView().size());
+		uint32Vector_D newIndices("newIndices", indView);
+
+		if(! appendNewIndices(newIndices))
+		{
+			fatalErrorInFunction;
+			return false;
+		}
+
+		if(!notify(
+			internal().time().currentIter(),
+			internal().time().currentTime(),
+			internal().time().dt(),
+			msg,
+			varList))
+		{
+			fatalErrorInFunction;
+			return false;
+		}
+
 		return false;
 	}
 
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
index a222cabe..8771869e 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
@@ -33,9 +33,11 @@ namespace pFlow::MPI
 		: public boundaryBase
 	{
 	public:
+
 		using pointFieldAccessType = typename boundaryBase::pointFieldAccessType;
 
 	private:
+	
 		uint32               neighborProcNumPoints_ = 0;
 
 		uint32               thisNumPoints_ = 0;
@@ -54,8 +56,6 @@ namespace pFlow::MPI
 
 		uint32Vector_D       transferIndices_{"transferIndices"};
 
-		void checkSize() const;
-
 		void checkDataRecieved() const;
 
 		/// @brief  Update processor boundary data for this processor
@@ -66,9 +66,9 @@ namespace pFlow::MPI
 		/// allow processor boundaries to exchange data in two steps.
 		/// The first step is a buffered non-blocking send and the second
 		/// step is non-blocking recieve to get data.
-		bool updataBoundary(int step) override;
+		bool updataBoundaryData(int step) override;
 
-		bool transferData(int step) override;
+		bool transferData(uint32 iter, int step) override;
 
 	public:
 		TypeInfo("boundary<processor>");
@@ -104,6 +104,18 @@ namespace pFlow::MPI
 		/// @brief Return a const reference to point positions in the
 		/// neighbor processor boundary.
 		const realx3Vector_D &neighborProcPoints() const override;
+
+		 
+		uint32 numToTransfer()const override
+		{
+			return numToTransfer_;
+		}
+
+		 
+		uint32 numToRecieve()const override
+		{
+			return numToRecieve_;
+		}
 	};
 
 } // namespace pFlow::MPI
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
index 962146eb..547e09f9 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
@@ -59,15 +59,28 @@ public:
             return buffer_.size();
     }
 
+    void sendBackData(
+        const localProcessors& processors)const
+    {
+        CheckMPI(
+            Isend(
+                buffer_.getSpan(),
+                fromProc_,
+                tag_,
+                processors.localCommunicator(),
+                &recvRequest_
+            ),
+            true
+        );
+    }
+
     void recieveData(
         const localProcessors&      processors,
         uint32 numToRecv
     )
     {   
-        waitBufferForUse();
-        buffer_.clear();
-        buffer_.resize(numToRecv);
-
+        resize(numToRecv);
+        
         CheckMPI(
             Irecv(
                 buffer_.getSpan(), 
@@ -80,16 +93,39 @@ public:
         );
     }
 
+    inline
     auto& buffer()
     {
         return buffer_;
     }
 
+    inline
     const auto& buffer()const
     {
         return buffer_;
     }
 
+    inline
+    void fill(const T& val)
+    {
+        waitBufferForUse();
+        buffer_.fill(val);
+    }
+
+    inline
+    uint32 size()const
+    {
+        return buffer_.size();
+    }
+
+    inline
+    void resize(uint32 newSize)
+    {
+        waitBufferForUse();
+        buffer_.clear();
+        buffer_.resize(newSize);
+    }
+
 };
 
 }
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
index 6342009b..18b907c8 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
@@ -26,7 +26,7 @@ public:
 
 private:
 
-	BufferVectorType buffer_;
+	mutable BufferVectorType buffer_;
 
 	int              toProc_;
 
@@ -103,6 +103,34 @@ public:
                 
     }
 
+    bool recieveBackData(
+        const localProcessors&      processors,
+        uint32 numToRecieve
+    )const
+    {       
+        // make sure the buffer is ready to be used and free 
+        // the previous request (if any).
+        waitBufferForUse();
+
+        // clear the buffer to prevent data copy if capacity increases 
+        buffer_.clear();
+        buffer_.resize(numToRecieve);
+        
+        Status status;
+        CheckMPI(
+            Irecv(
+                buffer_.getSpan(),
+                toProc_,
+                tag_,
+                processors.localCommunicator(),
+                &sendRequest_
+            ),
+            true
+        );
+
+        return true;
+    }
+
     auto& buffer()
     {
         return buffer_;
@@ -113,6 +141,18 @@ public:
         return buffer_;
     }
 
+    inline
+    void fill(const T& val)
+    {
+        waitBufferForUse();
+        buffer_.fill(val);
+    }
+
+    uint32 size()const
+    {
+        return buffer_.size();
+    }
+
     bool sendComplete()
     {
         int test;
@@ -127,6 +167,14 @@ public:
         }
     }
 
+    inline
+    void resize(uint32 newSize)
+    {
+        waitBufferForUse();
+        buffer_.clear();
+        buffer_.resize(newSize);
+    }
+
 };
 
 }

From d2003b35e6149aedecaf96a49cac2217f9616aab Mon Sep 17 00:00:00 2001
From: Hamidreza Norouzi <hamid.r.norouzi@gmail.com>
Date: Wed, 22 May 2024 09:52:48 +0330
Subject: [PATCH 10/14] bug fix to work with CUDA run

---
 .../twoPartContactSearchKernels.cpp           |  8 +--
 .../domain/MPISimulationDomain.cpp            |  9 ++-
 .../pointField/processorBoundaryField.cpp     | 28 ++++++++--
 .../boundaries/boundaryProcessor.cpp          | 26 ++++-----
 .../boundaries/boundaryProcessorKernels.hpp   | 56 +++++++++++++++++++
 .../pointStructure/boundaries/dataSender.hpp  |  1 -
 6 files changed, 100 insertions(+), 28 deletions(-)
 create mode 100644 src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessorKernels.hpp

diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
index 515e5af1..9faa80fc 100644
--- a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
@@ -87,14 +87,14 @@ pFlow::twoPartContactSearchKernels::broadSearchPP(
 						  continue;
 
 					  uint32 thisI = head(ind.x(), ind.y(), ind.z());
-					  while (thisI != -1)
+					  while (thisI !=  static_cast<uint32>(-1))
 					  {
 						  auto d_n = sizeRatio * diams[thisI];
 
 						  // first item is for this boundary and second itme,
 						  // for mirror
 						  if(sphereSphereCheckB(p_m, points[thisI], d_m, d_n)&&
-                       ppPairs.insert(thisI,mrrI) == -1)
+                       ppPairs.insert(thisI,mrrI) == static_cast<uint32>(-1))
 						  {
 							  getFullUpdate++;
 						  }
@@ -161,14 +161,14 @@ pFlow::twoPartContactSearchKernels::broadSearchPP(
 					  }
 
 					  uint32 i1 = head(ind.x(), ind.y(), ind.z());
-					  while (i1 != -1)
+					  while (i1 != static_cast<uint32>(-1))
 					  {
 						  auto d_n = sizeRatio * diams1[i1];
 
 						  // first item is for this boundary and second itme,
 						  // for mirror
 						  if(sphereSphereCheckB(p_m, points1[i1], d_m, d_n)&&
-                       ppPairs.insert(i1,i2) == -1)
+                       ppPairs.insert(i1,i2) == static_cast<uint32>(-1))
 						  {
 							  getFullUpdate++;
 						  }
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
index 87b050eb..9a5ee76e 100644
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
@@ -41,9 +41,12 @@ bool pFlow::MPI::MPISimulationDomain::createBoundaryDicts()
     auto& mpiBoundaries = this->subDict("MPIBoundaries");
 
     real neighborLength = boundaries.getVal<real>("neighborLength");
-    auto boundaryExtntionLengthRatio = 
-		boundaries.getValOrSet<real>("boundaryExtntionLengthRatio", 0.1);
-	auto updateIntercal = boundaries.getValOrSet<uint32>("updateInterval", 1u);
+    auto boundaryExtntionLengthRatio = max(
+		boundaries.getValOrSet<real>("boundaryExtntionLengthRatio", 0.1),
+        0.0);
+	auto updateIntercal = max(
+        boundaries.getValOrSet<uint32>("updateInterval", 1u),
+        1u);
 
     auto neighbors = findPlaneNeighbors();
 
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
index 820831b4..b7348a2a 100644
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
@@ -136,12 +136,28 @@ bool pFlow::MPI::processorBoundaryField<T, MemorySpace>::hearChanges(
 			message::eventName(message::BNDR_PROCTRANSFER_SEND)
 		);
 
-		FieldAccessType transferData(
-			indices.size(), 
-			indices.deviceViewAll(),
-			this->internal().deviceViewAll()
-		);
-		sender_.sendData(pFlowProcessors(),transferData);
+        if constexpr( isDeviceAccessible<execution_space>())
+        {
+            FieldAccessType transferData(
+			    indices.size(), 
+			    indices.deviceViewAll(),
+			    this->internal().deviceViewAll()
+		    );
+
+		    sender_.sendData(pFlowProcessors(),transferData);
+        }
+        else
+        {
+            FieldAccessType transferData(
+			    indices.size(), 
+			    indices.hostViewAll(),
+			    this->internal().deviceViewAll()
+		    );
+            
+		    sender_.sendData(pFlowProcessors(),transferData);
+        }
+
+		
 	}
 	else if(msg.equivalentTo(message::BNDR_PROCTRANSFER_RECIEVE))
 	{
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
index 76be7508..54bed6ad 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
@@ -19,6 +19,7 @@ Licence:
 -----------------------------------------------------------------------------*/
 
 #include "boundaryProcessor.hpp"
+#include "boundaryProcessorKernels.hpp"
 #include "dictionary.hpp"
 #include "mpiCommunication.hpp"
 #include "boundaryBaseKernels.hpp"
@@ -69,8 +70,6 @@ pFlow::MPI::boundaryProcessor::beforeIteration(uint32 iterNum, real t, real dt)
 
 	thisNumPoints_ = size();
 
-	uint32 oldNeighborProcNumPoints = neighborProcNumPoints_;
-
 	auto req = MPI_REQUEST_NULL;
 	MPI_Isend(
 		&thisNumPoints_,
@@ -103,7 +102,6 @@ pFlow::MPI::boundaryProcessor::beforeIteration(uint32 iterNum, real t, real dt)
 		return false;
 	}
 	
-
 	return true;
 }
 
@@ -154,23 +152,21 @@ bool pFlow::MPI::boundaryProcessor::transferData(uint32 iter, int step)
 		transferFlags.fill(0u);
 
 		const auto& transferD = transferFlags.deviceViewAll();
-		auto points = thisPoints();
+		deviceScatteredFieldAccess<realx3> points = thisPoints();
 		auto p = boundaryPlane().infPlane();
 
 		numToTransfer_ = 0;	
 
-		Kokkos::parallel_reduce
+		
+        Kokkos::parallel_reduce
 		(
 			"boundaryProcessor::afterIteration",
 			deviceRPolicyStatic(0,s),
-			LAMBDA_HD(uint32 i, uint32& transferToUpdate)
-			{
-				if(p.pointInNegativeSide(points(i)))
-				{
-					transferD(i)=1;
-					transferToUpdate++;
-				}
-			}, 
+			boundaryProcessorKernels::markNegative(
+                boundaryPlane().infPlane(),
+                transferFlags.deviceViewAll(),
+                thisPoints()
+            ),
 			numToTransfer_
 		);
 			
@@ -206,13 +202,15 @@ bool pFlow::MPI::boundaryProcessor::transferData(uint32 iter, int step)
 			thisBoundaryIndex(),
 			pFlowProcessors().localCommunicator(),
 			&req), true );
-
+        //pOutput<<"sent "<< numToTransfer_<<endl;
 		CheckMPI(recv(
 			numToRecieve_,
 			neighborProcessorNo(),
 			mirrorBoundaryIndex(),
 			pFlowProcessors().localCommunicator(),
 			StatusesIgnore), true);
+        
+        //pOutput<<"recieved "<<numToRecieve_<<endl;
 
 		MPI_Request_free(&req);
 		return true;
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessorKernels.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessorKernels.hpp
new file mode 100644
index 00000000..28e6aa4e
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessorKernels.hpp
@@ -0,0 +1,56 @@
+/*------------------------------- phasicFlow ---------------------------------
+	  O        C enter of
+	 O O       E ngineering and
+	O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "phasicFlowKokkos.hpp"
+#include "infinitePlane.hpp"
+#include "scatteredFieldAccess.hpp"
+
+namespace pFlow::boundaryProcessorKernels
+{
+
+struct markNegative
+{
+    markNegative(const infinitePlane& pl,
+	const deviceViewType1D<uint32>& f,
+	const deviceScatteredFieldAccess<realx3>& p
+	)
+	:
+		plane_(pl),
+		flags_(f),
+		points_(p)
+	{}
+	
+	infinitePlane        plane_;
+    deviceViewType1D<uint32> flags_;
+    deviceScatteredFieldAccess<realx3> points_;
+
+	INLINE_FUNCTION_HD
+    void operator()(uint32 i, uint32& transferToUpdate)const
+    {
+        if(plane_.pointInNegativeSide(points_(i)))
+		{
+			flags_(i)=1;
+			transferToUpdate++;
+		}
+    }
+
+};
+
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
index 18b907c8..a4c5d39b 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
@@ -116,7 +116,6 @@ public:
         buffer_.clear();
         buffer_.resize(numToRecieve);
         
-        Status status;
         CheckMPI(
             Irecv(
                 buffer_.getSpan(),

From 5a25556b8a15bb3f79bfe0c29b656f8e2b66a4da Mon Sep 17 00:00:00 2001
From: HRN <hamid.r.norouzi@gmail.com>
Date: Fri, 24 May 2024 00:10:15 +0330
Subject: [PATCH 11/14] changing blocking-recive to non-blocking (in
 boundaryProcessor) to improve MPI run

---
 .../processorBoundarySphereInteraction.cpp    |  4 +-
 .../boundaries/boundaryProcessor.cpp          | 86 +++++++++++--------
 .../boundaries/boundaryProcessor.hpp          |  6 +-
 3 files changed, 59 insertions(+), 37 deletions(-)

diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
index f0e2a9a1..d55ab2b3 100644
--- a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
@@ -106,7 +106,7 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
 			return true;
 		}
 		
-		if(iter % 1000 == 0u)
+		if(iter % 100 == 0u)
 		{
 			pOutput<<"inter "<< inter_.totalTime()<<endl;
 			pOutput<<"send "<< send_.totalTime()<<endl<<endl;;
@@ -144,7 +144,7 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
 			return true;
 		}
 		
-		if(iter % 1000 == 0u)
+		if(iter % 100 == 0u)
 		{
 			pOutput<<"recive "<< recv_.totalTime()<<endl;
 			pOutput<<"add "<< add_.totalTime()<<endl<<endl;
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
index 54bed6ad..cf34617d 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
@@ -65,43 +65,61 @@ pFlow::MPI::boundaryProcessor::boundaryProcessor(
 }
 
 bool
-pFlow::MPI::boundaryProcessor::beforeIteration(uint32 iterNum, real t, real dt)
+pFlow::MPI::boundaryProcessor::beforeIteration(
+	uint32 step, 
+	uint32 iterNum, 
+	real t, 
+	real dt)
 {
-
-	thisNumPoints_ = size();
-
-	auto req = MPI_REQUEST_NULL;
-	MPI_Isend(
-		&thisNumPoints_,
-		1,
-		MPI_UNSIGNED,
-		neighborProcessorNo(),
-		thisBoundaryIndex(),
-		pFlowProcessors().localCommunicator(),
-		&req);
-
-	MPI_Recv(
-		&neighborProcNumPoints_,
-		1,
-		MPI_UNSIGNED,
-		neighborProcessorNo(),
-		mirrorBoundaryIndex(),
-		pFlowProcessors().localCommunicator(),
-		MPI_STATUS_IGNORE
-	);
-	MPI_Request_free(&req);
-
-	anyList varList;
-	message msg;
-
-	varList.emplaceBack(msg.addAndName(message::BNDR_PROC_SIZE_CHANGED), neighborProcNumPoints_);
-
-	if( !notify(iterNum, t, dt, msg, varList) )
+		if(step == 1 )
 	{
-		fatalErrorInFunction;
-		return false;
+		thisNumPoints_ = size();
+
+		uint32 oldNeighborProcNumPoints = neighborProcNumPoints_;
+
+		MPI_Isend(
+			&thisNumPoints_,
+			1,
+			MPI_UNSIGNED,
+			neighborProcessorNo(),
+			thisBoundaryIndex(),
+			pFlowProcessors().localCommunicator(),
+			&numPointsRequest0_);
+
+		MPI_Irecv(
+			&neighborProcNumPoints_,
+			1,
+			MPI_UNSIGNED,
+			neighborProcessorNo(),
+			mirrorBoundaryIndex(),
+			pFlowProcessors().localCommunicator(),
+			&numPointsRequest_
+		);
+
 	}
-	
+	else if(step == 2 )
+	{
+		if(numPointsRequest_ != RequestNull)
+		{
+			MPI_Wait(&numPointsRequest_, MPI_STATUS_IGNORE);
+			if(numPointsRequest0_!= RequestNull)
+			{
+				MPI_Request_free(&numPointsRequest0_);
+			}
+		}
+
+		anyList varList;
+		message msg;
+
+		varList.emplaceBack(msg.addAndName(message::BNDR_PROC_SIZE_CHANGED), neighborProcNumPoints_);
+
+		if( !notify(iterNum, t, dt, msg, varList) )
+		{
+			fatalErrorInFunction;
+			return false;
+		}
+	}
+
 	return true;
 }
 
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
index 8771869e..4f62c86c 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
@@ -42,6 +42,10 @@ namespace pFlow::MPI
 
 		uint32               thisNumPoints_ = 0;
 
+		Request 			 numPointsRequest_ = RequestNull;
+
+		Request 			 numPointsRequest0_ = RequestNull;
+
 		realx3Vector_D       neighborProcPoints_;
 
 		dataSender<realx3>   sender_;
@@ -87,7 +91,7 @@ namespace pFlow::MPI
 			boundaryProcessor,
 			dictionary);
 
-		bool beforeIteration(uint32 iterNum, real t, real dt) override;
+		bool beforeIteration(uint32 step, uint32 iterNum, real t, real dt) override;
 
 		bool iterate(uint32 iterNum, real t, real dt) override;
 

From 93617a6ee5b7533f6f4f09aea89d0ac971f07ddc Mon Sep 17 00:00:00 2001
From: HRN <hamid.r.norouzi@gmail.com>
Date: Wed, 7 Aug 2024 15:13:15 +0330
Subject: [PATCH 12/14] MPI developement zeroTime

---
 .../processorAB2BoundaryIntegration.cpp       |  71 +++++
 .../processorAB2BoundaryIntegration.hpp       |  51 ++++
 .../processorBoundaryContactSearch.cpp        |   2 +-
 .../processorBoundarySIKernels.hpp            |   3 +-
 .../processorBoundarySphereInteraction.cpp    | 175 ++++++++---
 .../processorBoundarySphereInteraction.hpp    |   7 +-
 .../processorBoundarySphereParticles.cpp      |  46 +++
 .../processorBoundarySphereParticles.hpp      |  38 +++
 .../MPI/mpiCommunication.hpp                  |  23 +-
 .../MPIParallelization/MPI/mpiTypes.hpp       |   2 +
 .../domain/MPISimulationDomain.cpp            |  50 +---
 .../domain/MPISimulationDomain.hpp            |   2 -
 .../pointField/processorBoundaryField.cpp     |  80 +++--
 .../pointField/processorBoundaryField.hpp     |  11 +-
 .../boundaries/boundaryProcessor.cpp          | 273 +++++++++++-------
 .../boundaries/boundaryProcessor.hpp          |  25 +-
 .../boundaries/dataReciever.hpp               |   3 +-
 .../pointStructure/boundaries/dataSender.hpp  |  27 +-
 18 files changed, 647 insertions(+), 242 deletions(-)
 create mode 100644 src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp
 create mode 100644 src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.hpp
 create mode 100644 src/Particles/SphereParticles/processorBoundarySphereParticles.cpp
 create mode 100644 src/Particles/SphereParticles/processorBoundarySphereParticles.hpp

diff --git a/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp b/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp
new file mode 100644
index 00000000..494f9938
--- /dev/null
+++ b/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp
@@ -0,0 +1,71 @@
+#include "processorAB2BoundaryIntegration.hpp"
+#include "AdamsBashforth2.hpp"
+#include "AB2Kernels.hpp"
+#include "boundaryConfigs.hpp"
+
+pFlow::processorAB2BoundaryIntegration::processorAB2BoundaryIntegration(
+    const boundaryBase &boundary, 
+    const pointStructure &pStruct,  
+    const word &method,
+    integration& intgrtn
+)
+:
+    boundaryIntegration(boundary, pStruct, method, intgrtn)
+{}
+
+bool pFlow::processorAB2BoundaryIntegration::correct(
+    real dt, 
+	const realx3PointField_D& y, 
+	const realx3PointField_D& dy
+)
+{
+
+#ifndef BoundaryModel1
+    if(this->isBoundaryMaster())
+    {
+        const uint32 thisIndex = thisBoundaryIndex();
+		const auto&  AB2 =  static_cast<const AdamsBashforth2&>(Integration());
+        const auto& dy1View = AB2.BoundaryField(thisIndex).neighborProcField().deviceView();
+		const auto& dyView = dy.BoundaryField(thisIndex).neighborProcField().deviceView();
+		const auto& yView = y.BoundaryField(thisIndex).neighborProcField().deviceView();        
+        const rangeU32 aRange(0u, dy1View.size());
+        return  AB2Kernels::intAllActive(
+			"AB2Integration::correct."+this->boundaryName(), 
+			dt,
+            aRange, 
+			yView, 
+			dyView, 
+			dy1View
+		); 
+    }
+#endif //BoundaryModel1
+    
+    
+    return true;
+}
+
+bool pFlow::processorAB2BoundaryIntegration::correctPStruct(real dt, const realx3PointField_D &vel)
+{
+
+	#ifndef BoundaryModel1
+    if(this->isBoundaryMaster())
+    {
+        const uint32 thisIndex = thisBoundaryIndex();
+		const auto&  AB2 =  static_cast<const AdamsBashforth2&>(Integration());
+        const auto& dy1View = AB2.BoundaryField(thisIndex).neighborProcField().deviceView();
+		const auto& velView = vel.BoundaryField(thisIndex).neighborProcField().deviceView();
+		const auto& xposView = boundary().neighborProcPoints().deviceView();        
+        const rangeU32 aRange(0u, dy1View.size());
+        return  AB2Kernels::intAllActive(
+			"AB2Integration::correctPStruct."+this->boundaryName(), 
+			dt,
+            aRange, 
+			xposView, 
+			velView, 
+			dy1View
+		); 
+    }
+#endif //BoundaryModel1
+    
+    return true;
+}
diff --git a/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.hpp b/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.hpp
new file mode 100644
index 00000000..7ff6aacb
--- /dev/null
+++ b/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.hpp
@@ -0,0 +1,51 @@
+
+
+#ifndef __processorAB2BoundaryIntegration_hpp__
+#define __processorAB2BoundaryIntegration_hpp__
+
+#include "boundaryIntegration.hpp"
+
+namespace pFlow
+{
+
+class processorAB2BoundaryIntegration
+:
+    public boundaryIntegration
+{   
+public:
+
+    TypeInfo("boundaryIntegration<processor,AdamsBashforth2>");
+
+    processorAB2BoundaryIntegration(
+        const boundaryBase& boundary,
+        const pointStructure& pStruct, 
+        const word& method,
+        integration& intgrtn
+    );
+
+    ~processorAB2BoundaryIntegration()override=default;
+
+    
+	bool correct(
+        real dt, 
+        const realx3PointField_D& y, 
+        const realx3PointField_D& dy)override;
+	
+
+    
+    bool correctPStruct(real dt, const realx3PointField_D& vel)override;
+    
+    
+    add_vCtor(
+        boundaryIntegration,
+        processorAB2BoundaryIntegration,
+        boundaryBase
+    );
+
+    
+
+};
+
+}
+
+#endif
\ No newline at end of file
diff --git a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
index 8281c55c..323f23f7 100644
--- a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
+++ b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
@@ -99,7 +99,7 @@ bool pFlow::processorBoundaryContactSearch::broadSearch
             thisDiams,
             neighborProcPoints,
             neighborProcDiams,
-            name()
+            boundaryName()
         );
         //pOutput<<"ppSize "<< ppPairs.size()<<endl;
         return true;
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp
index a62f3166..ee809c4b 100644
--- a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp
@@ -9,6 +9,7 @@ template<typename ContactListType, typename ContactForceModel>
 inline
 void sphereSphereInteraction
 (
+    const word& kernalName,
     real dt,
     const ContactListType&              cntctList,
     const ContactForceModel&            forceModel,
@@ -36,7 +37,7 @@ void sphereSphereInteraction
     uint32 lastItem = cntctList.loopCount();
 
     Kokkos::parallel_for(
-        "pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction",
+        kernalName,
         deviceRPolicyDynamic(0,lastItem),
         LAMBDA_HD(uint32 n)
         {
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
index d55ab2b3..231260da 100644
--- a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
@@ -32,11 +32,6 @@ pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::processorBoundarySpher
 		geomMotion
 	),
 	masterInteraction_(boundary.isBoundaryMaster())
-	,
-	inter_("inter"),
-	send_("send"),
-	recv_("recv"),
-	add_("add")
 {
 	if(masterInteraction_)
 	{
@@ -46,6 +41,9 @@ pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::processorBoundarySpher
 		
 }
 
+
+#ifdef BoundaryModel1
+
 template <typename cFM, typename gMM>
 bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInteraction
 (
@@ -74,8 +72,8 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
 		if(step == 2 )
 		{
 			iter++;
-			inter_.start();
 			pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
+				"ppBoundaryInteraction."+this->boundaryName(),
 				dt,
 				this->ppPairs(),
 				cfModel,
@@ -94,64 +92,165 @@ bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInter
 				cfBndry.neighborProcField().deviceViewAll(),
 				ctBndry.neighborProcField().deviceViewAll()
 			);
-			inter_.end();
+			
 			return true;
 		}		
 		else if(step == 3 )
 		{
-			send_.start();
 			cfBndry.sendBackData();
 			ctBndry.sendBackData();
-			send_.end();
+			
 			return true;
 		}
 		
-		if(iter % 100 == 0u)
-		{
-			pOutput<<"inter "<< inter_.totalTime()<<endl;
-			pOutput<<"send "<< send_.totalTime()<<endl<<endl;;
-		}
+		
 		return false;	
 	}
 	else
 	{
 		
-		const auto & sphPar = this->sphParticles();
-		uint32 thisIndex = this->boundary().thisBoundaryIndex();
-		const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&>(
-		sphPar.contactForce().BoundaryField(thisIndex));
-		const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
-		sphPar.contactTorque().BoundaryField(thisIndex));
-		if(step==1)
+		if(step == 1 )
 		{
-			recv_.start();
+			const auto & sphPar = this->sphParticles();
+			uint32 thisIndex = this->boundary().thisBoundaryIndex();
+			const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&>(
+			sphPar.contactForce().BoundaryField(thisIndex));
+			const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+			sphPar.contactTorque().BoundaryField(thisIndex));
+
 			cfBndry.recieveBackData();
 			ctBndry.recieveBackData();
-			recv_.end();
+			
 			return false;
 		}
-		else if(step == 2)
+		else if(step == 11)
 		{
-			iter++;
-			return true;
-		}
-		else if(step == 3)
-		{
-			add_.start();
+			const auto & sphPar = this->sphParticles();
+			uint32 thisIndex = this->boundary().thisBoundaryIndex();
+			const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&>(
+			sphPar.contactForce().BoundaryField(thisIndex));
+			const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+			sphPar.contactTorque().BoundaryField(thisIndex));
+
 			cfBndry.addBufferToInternalField();
 			ctBndry.addBufferToInternalField();
-			add_.end();
+	
 			return true;
 		}
 		
-		if(iter % 100 == 0u)
-		{
-			pOutput<<"recive "<< recv_.totalTime()<<endl;
-			pOutput<<"add "<< add_.totalTime()<<endl<<endl;
-		}
-
+		
 		return false;
 	}
 
 	return false;
-}
\ No newline at end of file
+}
+#else
+
+template <typename cFM, typename gMM>
+bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInteraction
+(
+	real dt,
+	const ContactForceModel &cfModel,
+	uint32 step
+)
+{
+	
+	// master processor calculates the contact force/torque and sends data back to the 
+	// neighbor processor (slave processor).
+	// slave processor recieves the data and adds the data to the internalField 
+	if(masterInteraction_) 
+	{
+		if(step==1)return true;
+
+		const auto & sphPar = this->sphParticles();
+		uint32 thisIndex = this->boundary().thisBoundaryIndex();
+		
+		const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&> (
+		sphPar.contactForce().BoundaryField(thisIndex));
+
+		const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+		sphPar.contactTorque().BoundaryField(thisIndex));
+
+		if(step == 2 )
+		{
+			
+			pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
+				"ppBoundaryInteraction."+this->boundaryName(),
+				dt,
+				this->ppPairs(),
+				cfModel,
+				this->boundary().thisPoints(),
+				sphPar.diameter().deviceViewAll(),
+				sphPar.propertyId().deviceViewAll(),
+				sphPar.velocity().deviceViewAll(),
+				sphPar.rVelocity().deviceViewAll(),
+				sphPar.contactForce().deviceViewAll(),
+				sphPar.contactTorque().deviceViewAll(),
+				this->boundary().neighborProcPoints().deviceViewAll(),
+				sphPar.diameter().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.propertyId().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.velocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.rVelocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				cfBndry.neighborProcField().deviceViewAll(),
+				ctBndry.neighborProcField().deviceViewAll()
+			);
+		
+			return true;
+		}		
+		else if(step == 3 )
+		{
+			cfBndry.sendBackData();
+			ctBndry.sendBackData();
+			return true;
+		}
+		else if(step == 11 )
+		{
+			cfBndry.updateBoundaryFromSlave();
+			ctBndry.updateBoundaryFromSlave();
+			return true;
+		}
+
+		return false;	
+	}
+	else
+	{
+		
+		if(step == 1 )
+		{
+			const auto & sphPar = this->sphParticles();
+			uint32 thisIndex = this->boundary().thisBoundaryIndex();
+			const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&>(
+			sphPar.contactForce().BoundaryField(thisIndex));
+			const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+			sphPar.contactTorque().BoundaryField(thisIndex));
+
+			cfBndry.recieveBackData();
+			ctBndry.recieveBackData();
+			
+			return false;
+		}
+		else if(step == 11)
+		{
+			const auto & sphPar = this->sphParticles();
+			uint32 thisIndex = this->boundary().thisBoundaryIndex();
+			const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&>(
+			sphPar.contactForce().BoundaryField(thisIndex));
+			const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+			sphPar.contactTorque().BoundaryField(thisIndex));
+
+			cfBndry.addBufferToInternalField();
+			cfBndry.updateBoundaryToMaster();
+
+			ctBndry.addBufferToInternalField();
+			ctBndry.updateBoundaryToMaster();
+
+			return true;
+		}
+				
+		return false;
+	}
+
+	return false;
+}
+
+#endif
\ No newline at end of file
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
index a07d434a..5d27bd76 100644
--- a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
@@ -22,6 +22,7 @@ Licence:
 
 #include "boundarySphereInteraction.hpp"
 #include "processorBoundaryField.hpp"
+#include "boundaryProcessor.hpp"
 
 namespace pFlow::MPI
 {
@@ -57,12 +58,6 @@ private:
     
     bool    masterInteraction_;
 
-    Timer   inter_;
-    Timer   send_;
-    Timer   recv_;
-    Timer   add_;
-    uint32  iter=0;
-
 public:
 
     TypeInfoTemplate22("boundarySphereInteraction", "processor",ContactForceModel, MotionModel);
diff --git a/src/Particles/SphereParticles/processorBoundarySphereParticles.cpp b/src/Particles/SphereParticles/processorBoundarySphereParticles.cpp
new file mode 100644
index 00000000..c07b356c
--- /dev/null
+++ b/src/Particles/SphereParticles/processorBoundarySphereParticles.cpp
@@ -0,0 +1,46 @@
+#include "processorBoundarySphereParticles.hpp"
+#include "sphereParticles.hpp"
+#include "boundaryProcessor.hpp"
+
+pFlow::processorBoundarySphereParticles::processorBoundarySphereParticles(
+    const boundaryBase &boundary, 
+    sphereParticles &prtcls
+)
+:
+    boundarySphereParticles(boundary, prtcls)
+{
+
+}
+
+bool pFlow::processorBoundarySphereParticles::acceleration(const timeInfo &ti, const realx3& g)
+{
+    
+
+#ifndef BoundaryModel1
+    
+
+    if(isBoundaryMaster())
+    {
+        auto thisIndex = thisBoundaryIndex();
+        auto mass = Particles().mass().BoundaryField(thisIndex).neighborProcField().deviceView();
+        auto I = Particles().I().BoundaryField(thisIndex).neighborProcField().deviceView();
+        auto cf = Particles().contactForce().BoundaryField(thisIndex).neighborProcField().deviceView();
+        auto ct = Particles().contactTorque().BoundaryField(thisIndex).neighborProcField().deviceView();
+        auto acc = Particles().accelertion().BoundaryField(thisIndex).neighborProcField().deviceView();
+        auto rAcc = Particles().rAcceleration().BoundaryField(thisIndex).neighborProcField().deviceView();
+
+        Kokkos::parallel_for(
+            "processorBoundary::acceleration."+this->boundaryName(),
+            deviceRPolicyStatic(0,mass.size()),
+            LAMBDA_HD(uint32 i){
+                acc[i] = cf[i]/mass[i] + g;
+                rAcc[i] = ct[i]/I[i];
+            });
+        Kokkos::fence();    
+    }
+
+#endif
+    
+
+    return true;
+}
diff --git a/src/Particles/SphereParticles/processorBoundarySphereParticles.hpp b/src/Particles/SphereParticles/processorBoundarySphereParticles.hpp
new file mode 100644
index 00000000..cf4b1955
--- /dev/null
+++ b/src/Particles/SphereParticles/processorBoundarySphereParticles.hpp
@@ -0,0 +1,38 @@
+#ifndef __processorBoundarySphereParticles_hpp__
+#define __processorBoundarySphereParticles_hpp__
+
+#include "boundarySphereParticles.hpp"
+
+namespace pFlow
+{
+
+class processorBoundarySphereParticles
+:
+    public boundarySphereParticles
+{
+
+public:
+
+    /// type info
+    TypeInfo("boundarySphereParticles<MPI,processor>");
+
+    processorBoundarySphereParticles(
+        const boundaryBase &boundary,
+        sphereParticles&  prtcls
+    );
+
+    add_vCtor(
+        boundarySphereParticles,
+        processorBoundarySphereParticles,
+        boundaryBase
+    );
+
+    bool acceleration(const timeInfo& ti, const realx3& g)override;
+    
+
+};
+
+}
+
+
+#endif
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp b/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
index 27d259eb..2ef1caed 100644
--- a/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
+++ b/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
@@ -17,7 +17,6 @@ Licence:
   implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
 -----------------------------------------------------------------------------*/
-
 #ifndef __mpiCommunication_H__
 #define __mpiCommunication_H__
 
@@ -37,6 +36,8 @@ extern DataType realx4Type__;
 
 extern DataType int32x3Type__;
 
+extern DataType uint32x3Type__;
+
 template<typename T> 
 auto constexpr Type()
 {
@@ -190,6 +191,20 @@ auto constexpr sFactor<int32x3>()
 	return 1;
 }
 
+template<>
+inline
+auto Type<uint32x3>()
+{
+	return uint32x3Type__;
+}
+
+
+template<>
+auto constexpr sFactor<uint32x3>()
+{
+	return 1;
+}
+
 /*inline 
 auto createByteSequence(int sizeOfElement)
 {
@@ -211,6 +226,7 @@ auto TypeFree(DataType* type)
     return MPI_Type_free(type);
 
 }
+
 template<typename T>
 inline auto getCount(Status* status, int& count)
 {
@@ -440,11 +456,6 @@ inline auto Wait(Request* request, Status* status)
 	return MPI_Wait(request, status);
 }
 
-inline auto typeFree(DataType& type)
-{
-	return MPI_Type_free(&type);
-}
-
 
 }
 
diff --git a/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp b/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp
index 873dd7eb..05b45e93 100644
--- a/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp
+++ b/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp
@@ -55,6 +55,8 @@ namespace pFlow::MPI
 	inline const auto ErrOp 	= MPI_ERR_OP;
 
 	inline const auto SumOp		= MPI_SUM;
+	inline const auto MaxOp 	= MPI_MAX;
+	inline const auto MinOp 	= MPI_MIN;
 
 	inline const size_t MaxNoProcessors = 2048;
 	
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
index 9a5ee76e..34890575 100644
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
@@ -35,46 +35,25 @@ pFlow::MPI::MPISimulationDomain::MPISimulationDomain(systemControl& control)
 
 bool pFlow::MPI::MPISimulationDomain::createBoundaryDicts()
 {
+
+    if(!prepareBoundaryDicts())
+	{
+		return false;
+	}
+
     auto& boundaries = this->subDict("boundaries");
     
-    this->addDict("MPIBoundaries", boundaries);
-    auto& mpiBoundaries = this->subDict("MPIBoundaries");
-
-    real neighborLength = boundaries.getVal<real>("neighborLength");
-    auto boundaryExtntionLengthRatio = max(
-		boundaries.getValOrSet<real>("boundaryExtntionLengthRatio", 0.1),
-        0.0);
-	auto updateIntercal = max(
-        boundaries.getValOrSet<uint32>("updateInterval", 1u),
-        1u);
+    
+    auto& thisBoundaries = this->subDict(thisBoundariesDictName());
 
+    
     auto neighbors = findPlaneNeighbors();
 
     for(uint32 i=0; i<sizeOfBoundaries(); i++)
 	{
-		word bName = bundaryName(i);
-		if( !boundaries.containsDictionay(bName) )
-		{
-			fatalErrorInFunction<<"dictionary "<< bName<<
-			"does not exist in "<< boundaries.globalName()<<endl;
-			return false;
-		}
-		auto& bDict = mpiBoundaries.subDict(bName);
-
-		if(!bDict.addOrKeep("neighborLength", neighborLength))
-		{
-			fatalErrorInFunction<<"error in adding neighborLength to "<< bName <<
-			"in dictionary "<< boundaries.globalName()<<endl;
-			return false;
-		}
-        
-        if(!bDict.addOrReplace("updateInterval", updateIntercal))
-		{
-			fatalErrorInFunction<<"error in adding updateIntercal to "<< bName <<
-			"in dictionary "<< boundaries.globalName()<<endl;
-		}
-
-		bDict.addOrReplace("boundaryExtntionLengthRatio", boundaryExtntionLengthRatio);
+		
+        word bName = bundaryName(i);
+		auto& bDict = thisBoundaries.subDict(bName);
 
         if( thisDomainActive_ )
         {
@@ -276,11 +255,6 @@ std::vector<int> pFlow::MPI::MPISimulationDomain::findPlaneNeighbors() const
     return neighbors;
 }
 
-const pFlow::dictionary &
-pFlow::MPI::MPISimulationDomain::thisBoundaryDict() const
-{
-    return this->subDict("MPIBoundaries");
-}
 
 bool pFlow::MPI::MPISimulationDomain::initialUpdateDomains(span<realx3> pointPos)
 {
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
index bab83611..8949409e 100644
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
@@ -72,8 +72,6 @@ public:
         systemControl
     );
 
-	const dictionary& thisBoundaryDict() const final;
-
 	/// @brief
 	/// @param pointPos
 	/// @return
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
index b7348a2a..d622a971 100644
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
@@ -25,13 +25,17 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::checkDataRecieved() const
 {
 	if (!dataRecieved_)
 	{
-		uint32 nRecv = reciever_.waitBufferForUse();
+		uint32 nRecv = neighborProcField_.waitBufferForUse();
 		dataRecieved_ = true;
 		if (nRecv != this->neighborProcSize())
 		{
-			fatalErrorInFunction;
+			fatalErrorInFunction<<
+			"number of recived data is "<< nRecv <<" and expected number is "<<
+			this->neighborProcSize()<< " in "<<this->name() <<endl;
 			fatalExit;
 		}
+
+		//pOutput<<"field data "<< this->name()<<" has recieved with size "<< nRecv<<endl;
 	}
 }
 
@@ -42,6 +46,11 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
   DataDirection direction
 )
 {
+#ifndef BoundaryModel1
+	if(!this->boundary().performBoundarytUpdate())
+		return true;
+#endif
+
 	if (step == 1)
 	{
 		// Isend
@@ -49,9 +58,11 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
 		( this->isBoundaryMaster() && direction == DataDirection::MasterToSlave) || 
 		(!this->isBoundaryMaster() && direction == DataDirection::SlaveToMaster))
 		{
-			sender_.sendData(pFlowProcessors(), this->thisField());
+			thisFieldInNeighbor_.sendData(pFlowProcessors(), this->thisField(), this->name());
 			dataRecieved_ = false;
+			//pOutput<<"request for boundary update "<< this->name()<<" direction "<< (int)direction<<endl; 
 		}
+		
 	}
 	else if (step == 2)
 	{
@@ -60,8 +71,9 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
 		(!this->isBoundaryMaster() && direction == DataDirection::MasterToSlave) || 
 		( this->isBoundaryMaster() && direction == DataDirection::SlaveToMaster))
 		{
-			reciever_.recieveData(pFlowProcessors(), this->neighborProcSize());
+			neighborProcField_.recieveData(pFlowProcessors(), this->neighborProcSize(), this->name());
 			dataRecieved_ = false;
+			//pOutput<<"request for boundary update "<< this->name()<<" direction "<< (int)direction<<endl;
 		}
 	}
 	else
@@ -80,13 +92,13 @@ pFlow::MPI::processorBoundaryField<T, MemorySpace>::processorBoundaryField(
   InternalFieldType&    internal
 )
   : BoundaryFieldType(boundary, pStruct, internal),
-    sender_(
-      groupNames("sendBufferField", boundary.name()),
+    thisFieldInNeighbor_(
+      groupNames("sendBuffer", this->name()),
       boundary.neighborProcessorNo(),
       boundary.thisBoundaryIndex()
     ),
-    reciever_(
-      groupNames("neighborProcField", boundary.name()),
+    neighborProcField_(
+      groupNames("recieveBuffer", boundary.name()),
       boundary.neighborProcessorNo(),
       boundary.mirrorBoundaryIndex()
     )
@@ -102,7 +114,7 @@ typename pFlow::MPI::processorBoundaryField<T, MemorySpace>::ProcVectorType&
 pFlow::MPI::processorBoundaryField<T, MemorySpace>::neighborProcField()
 {
 	checkDataRecieved();
-	return reciever_.buffer();
+	return neighborProcField_.buffer();
 }
 
 template<class T, class MemorySpace>
@@ -111,7 +123,7 @@ const typename pFlow::MPI::processorBoundaryField<T, MemorySpace>::
   pFlow::MPI::processorBoundaryField<T, MemorySpace>::neighborProcField() const
 {
 	checkDataRecieved();
-	return reciever_.buffer();
+	return neighborProcField_.buffer();
 }
 
 template<class T, class MemorySpace>
@@ -127,7 +139,7 @@ bool pFlow::MPI::processorBoundaryField<T, MemorySpace>::hearChanges(
 	if(msg.equivalentTo(message::BNDR_PROC_SIZE_CHANGED))
 	{
 		auto newProcSize = varList.getObject<uint32>("size");
-		reciever_.resize(newProcSize);
+		neighborProcField_.resize(newProcSize);
 	}
 
 	if(msg.equivalentTo(message::BNDR_PROCTRANSFER_SEND))
@@ -144,7 +156,7 @@ bool pFlow::MPI::processorBoundaryField<T, MemorySpace>::hearChanges(
 			    this->internal().deviceViewAll()
 		    );
 
-		    sender_.sendData(pFlowProcessors(),transferData);
+		    thisFieldInNeighbor_.sendData(pFlowProcessors(),transferData);
         }
         else
         {
@@ -154,7 +166,7 @@ bool pFlow::MPI::processorBoundaryField<T, MemorySpace>::hearChanges(
 			    this->internal().deviceViewAll()
 		    );
             
-		    sender_.sendData(pFlowProcessors(),transferData);
+		    thisFieldInNeighbor_.sendData(pFlowProcessors(),transferData);
         }
 
 		
@@ -164,12 +176,12 @@ bool pFlow::MPI::processorBoundaryField<T, MemorySpace>::hearChanges(
 		uint32 numRecieved = varList.getObject<uint32>(
 			message::eventName(message::BNDR_PROCTRANSFER_RECIEVE)
 		);
-		reciever_.recieveData(pFlowProcessors(), numRecieved);
+		neighborProcField_.recieveData(pFlowProcessors(), numRecieved);
 	}
 	else if(msg.equivalentTo(message::BNDR_PROCTRANSFER_WAITFILL))
 	{
 		
-		uint32 numRecieved = reciever_.waitBufferForUse();
+		uint32 numRecieved = neighborProcField_.waitBufferForUse();
 
 		if(msg.equivalentTo(message::CAP_CHANGED))
 		{
@@ -188,7 +200,7 @@ bool pFlow::MPI::processorBoundaryField<T, MemorySpace>::hearChanges(
 		const auto& indices = varList.getObject<uint32IndexContainer>(
 			message::eventName(message::ITEM_INSERT));
 		
-		this->internal().field().insertSetElement(indices, reciever_.buffer().deviceView());
+		this->internal().field().insertSetElement(indices, neighborProcField_.buffer().deviceView());
 
 		return true;
 	}
@@ -198,14 +210,14 @@ bool pFlow::MPI::processorBoundaryField<T, MemorySpace>::hearChanges(
 template <class T, class MemorySpace>
 void pFlow::MPI::processorBoundaryField<T, MemorySpace>::sendBackData() const
 {
-	reciever_.sendBackData(pFlowProcessors());
+	neighborProcField_.sendBackData(pFlowProcessors());
 	dataRecieved_ = false;
 }
 
 template <class T, class MemorySpace>
 void pFlow::MPI::processorBoundaryField<T, MemorySpace>::recieveBackData() const
 {
-	sender_.recieveBackData(pFlowProcessors(), this->size());
+	thisFieldInNeighbor_.recieveBackData(pFlowProcessors(), this->size());
 }
 
 template <class T, class MemorySpace>
@@ -216,16 +228,17 @@ void pFlow::MPI::processorBoundaryField<T, MemorySpace>::addBufferToInternalFiel
 		Kokkos::Schedule<Kokkos::Static>,
 		Kokkos::IndexType<pFlow::uint32>>;
 
-	sender_.waitBufferForUse();
+	//pOutput<<"waiting for buffer to be recived in addBufferToInternalField "<<this->name()<<endl;
+	thisFieldInNeighbor_.waitBufferForUse();
 
-	const auto& buffView = sender_.buffer().deviceViewAll();
+	const auto& buffView = thisFieldInNeighbor_.buffer().deviceViewAll();
 	const auto& field = this->internal().deviceViewAll();
 
 	if constexpr( isDeviceAccessible<execution_space> )
 	{
 		const auto& indices = this->indexList().deviceViewAll();
 		Kokkos::parallel_for(
-			"dataSender::recieveBackData",
+			"recieveBackData::"+this->name(),
 			RPolicy(0,this->size()),
 			LAMBDA_HD(uint32 i)
 			{
@@ -238,7 +251,7 @@ void pFlow::MPI::processorBoundaryField<T, MemorySpace>::addBufferToInternalFiel
 	{
 		const auto& indices = this->boundary().indexListHost().deviceViewAll();
 		Kokkos::parallel_for(
-			"dataSender::recieveBackData",
+			"recieveBackData::"+this->name(),
 			RPolicy(0,this->size()),
 			LAMBDA_HD(uint32 i)
 			{
@@ -247,4 +260,25 @@ void pFlow::MPI::processorBoundaryField<T, MemorySpace>::addBufferToInternalFiel
 		);
 		Kokkos::fence();
 	}
-}
\ No newline at end of file
+}
+
+
+template <class T, class MemorySpace>
+void pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundaryToMaster()const
+{
+	if (!this->isBoundaryMaster() )
+	{
+		thisFieldInNeighbor_.sendData(pFlowProcessors(), this->thisField(), this->name());
+		dataRecieved_ = false;
+	}
+}
+
+template <class T, class MemorySpace>
+void pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundaryFromSlave()const
+{
+	if( this->isBoundaryMaster() )
+	{
+		neighborProcField_.recieveData(pFlowProcessors(), this->neighborProcSize(), this->name());
+		dataRecieved_ = false;
+	}
+}
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
index fd2c72e0..bd32d59d 100644
--- a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
@@ -23,6 +23,7 @@ Licence:
 #include "boundaryField.hpp"
 #include "dataSender.hpp"
 #include "dataReciever.hpp"
+#include "boundaryProcessor.hpp"
 
 namespace pFlow::MPI
 {
@@ -50,9 +51,9 @@ public:
 
 private:
 
-	dataSender<T, MemorySpace>   sender_;
+	mutable dataSender<T, MemorySpace>   thisFieldInNeighbor_;
 
-	dataReciever<T, MemorySpace> reciever_;
+	mutable dataReciever<T, MemorySpace> neighborProcField_;
 
 	mutable bool                 dataRecieved_ = true;
 
@@ -86,7 +87,7 @@ public:
 	
 	void fill(const T& val)override
 	{
-		reciever_.fill(val);
+		neighborProcField_.fill(val);
 	}
 
 	bool hearChanges(
@@ -103,6 +104,10 @@ public:
 
 	void addBufferToInternalField()const;
 
+	void updateBoundaryToMaster()const;
+
+	void updateBoundaryFromSlave()const;
+
 };
 
 }
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
index cf34617d..17d65a48 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
@@ -33,11 +33,13 @@ pFlow::MPI::boundaryProcessor::checkDataRecieved() const
 {
 	if (!dataRecieved_)
 	{
-		uint32 nRecv = reciever_.waitBufferForUse();
+		uint32 nRecv = neighborProcPoints_.waitBufferForUse();
 		dataRecieved_ = true;
 		if (nRecv != neighborProcSize())
 		{
-			fatalErrorInFunction;
+			fatalErrorInFunction<<"In boundary "<<this->name()<<
+			" ,number of recieved data is "<< nRecv<<
+			" and neighborProcSize is "<<neighborProcSize()<<endl;
 			fatalExit;
 		}
 	}
@@ -51,12 +53,12 @@ pFlow::MPI::boundaryProcessor::boundaryProcessor(
   uint32            thisIndex
 )
   : boundaryBase(dict, bplane, internal, bndrs, thisIndex),
-    sender_(
+    thisPointsInNeighbor_(
       groupNames("sendBuffer", name()),
       neighborProcessorNo(),
       thisBoundaryIndex()
     ),
-    reciever_(
+    neighborProcPoints_(
       groupNames("neighborProcPoints", name()),
       neighborProcessorNo(),
       mirrorBoundaryIndex()
@@ -67,57 +69,94 @@ pFlow::MPI::boundaryProcessor::boundaryProcessor(
 bool
 pFlow::MPI::boundaryProcessor::beforeIteration(
 	uint32 step, 
-	uint32 iterNum, 
-	real t, 
-	real dt)
+	const timeInfo& ti, 
+	bool updateIter, 
+	bool iterBeforeUpdate , 
+	bool& callAgain
+)
 {
-		if(step == 1 )
+	if(step == 1)
 	{
-		thisNumPoints_ = size();
-
-		uint32 oldNeighborProcNumPoints = neighborProcNumPoints_;
-
-		MPI_Isend(
-			&thisNumPoints_,
-			1,
-			MPI_UNSIGNED,
-			neighborProcessorNo(),
-			thisBoundaryIndex(),
-			pFlowProcessors().localCommunicator(),
-			&numPointsRequest0_);
-
-		MPI_Irecv(
-			&neighborProcNumPoints_,
-			1,
-			MPI_UNSIGNED,
-			neighborProcessorNo(),
-			mirrorBoundaryIndex(),
-			pFlowProcessors().localCommunicator(),
-			&numPointsRequest_
-		);
-
+		boundaryBase::beforeIteration(step, ti, updateIter, iterBeforeUpdate, callAgain);
+		callAgain = true;
 	}
 	else if(step == 2 )
 	{
+
+#ifdef BoundaryModel1
+	callAgain = true;
+#else
+	if(!performBoundarytUpdate())
+	{
+		callAgain = false;
+		return true;
+	}
+#endif
+
+	thisNumPoints_ = size();
+
+	MPI_Isend(
+		&thisNumPoints_,
+		1,
+		MPI_UNSIGNED,
+		neighborProcessorNo(),
+		thisBoundaryIndex(),
+		pFlowProcessors().localCommunicator(),
+		&numPointsRequest0_);
+
+	MPI_Irecv(
+		&neighborProcNumPoints_,
+		1,
+		MPI_UNSIGNED,
+		neighborProcessorNo(),
+		mirrorBoundaryIndex(),
+		pFlowProcessors().localCommunicator(),
+		&numPointsRequest_
+	);
+
+	}
+	else if(step == 3 )
+	{
+		callAgain = true;
+
 		if(numPointsRequest_ != RequestNull)
 		{
 			MPI_Wait(&numPointsRequest_, MPI_STATUS_IGNORE);
 			if(numPointsRequest0_!= RequestNull)
 			{
-				MPI_Request_free(&numPointsRequest0_);
+				MPI_Wait(&numPointsRequest0_, MPI_STATUS_IGNORE);
 			}
 		}
+		
+		// Size has not been changed. Notification is not required. 
+		if(neighborProcNumPoints_ == neighborProcPoints_.size()) return true;
 
 		anyList varList;
 		message msg;
 
 		varList.emplaceBack(msg.addAndName(message::BNDR_PROC_SIZE_CHANGED), neighborProcNumPoints_);
 
-		if( !notify(iterNum, t, dt, msg, varList) )
+		if( !notify(ti.iter(), ti.t(), ti.dt(), msg, varList) )
 		{
 			fatalErrorInFunction;
+			callAgain = false;
 			return false;
 		}
+		
+	}
+	else if(step == 4)
+	{
+		dataRecieved_ = false;
+		if ( !isBoundaryMaster())
+		{
+			thisPointsInNeighbor_.sendData(pFlowProcessors(), thisPoints(),"positions");
+		}
+		else if (isBoundaryMaster())
+		{
+			neighborProcPoints_.recieveData(pFlowProcessors(), neighborProcSize(), "positions");
+		}
+
+		callAgain = false;
 	}
 
 	return true;
@@ -133,62 +172,46 @@ pFlow::realx3Vector_D&
 pFlow::MPI::boundaryProcessor::neighborProcPoints()
 {
 	checkDataRecieved();
-	return reciever_.buffer();
+	return neighborProcPoints_.buffer();
 }
 
 const pFlow::realx3Vector_D&
 pFlow::MPI::boundaryProcessor::neighborProcPoints() const
 {
 	checkDataRecieved();
-	return reciever_.buffer();
+	return neighborProcPoints_.buffer();
 }
 
 bool
 pFlow::MPI::boundaryProcessor::updataBoundaryData(int step)
 {
-	if (step == 1)
-	{
-		sender_.sendData(pFlowProcessors(), thisPoints());
-		dataRecieved_ = false;
-	}
-	else if (step == 2)
-	{
-		reciever_.recieveData(pFlowProcessors(), neighborProcSize());
-		dataRecieved_ = false;
-	}
 	return true;
 }
 
-bool pFlow::MPI::boundaryProcessor::transferData(uint32 iter, int step)
+bool pFlow::MPI::boundaryProcessor::transferData(
+	uint32 iter, 
+	int step,
+	bool& callAgain
+)
 {
-	if(!boundaryListUpdate(iter))return false;
 	
-    if(step==1)
+	if( !iterBeforeBoundaryUpdate() )
 	{
-		uint32 s = size();
-		uint32Vector_D transferFlags("transferFlags",s+1, s+1, RESERVE());
-		transferFlags.fill(0u);
-
-		const auto& transferD = transferFlags.deviceViewAll();
-		deviceScatteredFieldAccess<realx3> points = thisPoints();
-		auto p = boundaryPlane().infPlane();
-
-		numToTransfer_ = 0;	
+		callAgain = false;
+		return true;
+	}
 
+    if(step == 1)
+	{
 		
-        Kokkos::parallel_reduce
-		(
-			"boundaryProcessor::afterIteration",
-			deviceRPolicyStatic(0,s),
-			boundaryProcessorKernels::markNegative(
-                boundaryPlane().infPlane(),
-                transferFlags.deviceViewAll(),
-                thisPoints()
-            ),
-			numToTransfer_
-		);
+		uint32Vector_D transferFlags("transferFlags"+this->name());
+		
+		numToTransfer_ = markInNegativeSide(
+			"transferData::markToTransfer"+this->name(),
+			transferFlags);
 			
 		uint32Vector_D keepIndices("keepIndices");
+		
 		if(numToTransfer_ != 0u)
 		{
 			pFlow::boundaryBaseKernels::createRemoveKeepIndices
@@ -200,6 +223,7 @@ bool pFlow::MPI::boundaryProcessor::transferData(uint32 iter, int step)
 				keepIndices,
 				false
 			);
+			
 			// delete transfer point from this processor 
 			if( !setRemoveKeepIndices(transferIndices_, keepIndices))
 			{
@@ -212,60 +236,80 @@ bool pFlow::MPI::boundaryProcessor::transferData(uint32 iter, int step)
 		{
 			transferIndices_.clear();
 		}
-
-		auto req = RequestNull;
+		
 		CheckMPI( Isend(
 			numToTransfer_, 
 			neighborProcessorNo(), 
 			thisBoundaryIndex(),
 			pFlowProcessors().localCommunicator(),
-			&req), true );
-        //pOutput<<"sent "<< numToTransfer_<<endl;
-		CheckMPI(recv(
+			&numTransferRequest_), true );
+        
+		CheckMPI(Irecv(
 			numToRecieve_,
 			neighborProcessorNo(),
 			mirrorBoundaryIndex(),
 			pFlowProcessors().localCommunicator(),
-			StatusesIgnore), true);
+			&numRecieveRequest_), true);
         
-        //pOutput<<"recieved "<<numToRecieve_<<endl;
-
-		MPI_Request_free(&req);
+		callAgain = true;
 		return true;
 	}
-	else if(step ==2 )
+	else if(step ==2) // to transferData to neighbor 
 	{
-		if( transferIndices_.empty() )return true; 
+		if(numTransferRequest_!= RequestNull)
+		{
+			Wait(&numTransferRequest_, StatusIgnore);
+		}
+		
+		if( numToTransfer_ == 0u)
+		{
+			callAgain = true;
+			return true;
+		} 
 
 		pointFieldAccessType transferPoints(
-		transferIndices_.size(), 
-		transferIndices_.deviceViewAll(),
-		internal().pointPositionDevice());
+			transferIndices_.size(), 
+			transferIndices_.deviceViewAll(),
+			internal().pointPositionDevice()
+		);
 
-		sender_.sendData(pFlowProcessors(), transferPoints);
+		// this buffer is used temporarily 
+		thisPointsInNeighbor_.sendData(pFlowProcessors(), transferPoints);
+		
 		message msg;
 		anyList varList;
 		varList.emplaceBack( 
 		msg.addAndName(message::BNDR_PROCTRANSFER_SEND),
 		transferIndices_);
 
-		if(!notify(
-		internal().time().currentIter(),
-		internal().time().currentTime(),
-		internal().time().dt(),
-		msg,
-		varList))
+		const auto ti = internal().time().TimeInfo();
+		
+		if(!notify(ti, msg,	varList)
+		)
 		{
 			fatalErrorInFunction;
+			callAgain = false;
 			return false;
 		} 
 
 		return true;
 	}
-	else if(step == 3)
+	else if(step == 3) // to recieve data 
 	{
-		if(numToRecieve_ == 0u) return false;
-		reciever_.recieveData(pFlowProcessors(), numToRecieve_);
+
+		if(numRecieveRequest_ != RequestNull)
+		{
+			Wait(&numRecieveRequest_, StatusIgnore);
+		}
+		
+		if(numToRecieve_ == 0u)
+		{
+			callAgain = false;
+			return true;
+		} 
+		
+		// this buffer is being used temporarily 
+		neighborProcPoints_.recieveData(pFlowProcessors(), numToRecieve_);
 		
 		message msg;
 		anyList varList;
@@ -273,65 +317,70 @@ bool pFlow::MPI::boundaryProcessor::transferData(uint32 iter, int step)
 		msg.addAndName(message::BNDR_PROCTRANSFER_RECIEVE),
 		numToRecieve_);
 
-		if(!notify(
-		internal().time().currentIter(),
-		internal().time().currentTime(),
-		internal().time().dt(),
-		msg,
-		varList))
+		const auto ti = internal().time().TimeInfo();
+		if(!notify( ti,	msg, varList))
 		{
 			fatalErrorInFunction;
+			callAgain = false;
 			return false;
 		}
 
+		callAgain = true;
 		return true;
 	}
-	else if(step == 4)
+	else if(step == 4) // to insert data 
 	{
-		if(numToRecieve_ == 0u) return false;
-		reciever_.waitBufferForUse();
+		if(numToRecieve_ == 0u)
+		{
+			callAgain = false;
+			return true;
+		}
 		
 		// points should be inserted first 
 		message msg(message::BNDR_PROCTRANSFER_WAITFILL);
 		anyList varList;
 
-		internal().insertPointsOnly(reciever_.buffer(), msg, varList);
+		neighborProcPoints_.waitBufferForUse();
+		internal().insertPointsOnly(neighborProcPoints_.buffer(), msg, varList);
+		
 		const auto& indices = varList.getObject<uint32IndexContainer>(message::eventName(message::ITEM_INSERT));
+		
 		auto indView = deviceViewType1D<uint32>(indices.deviceView().data(), indices.deviceView().size());
+		
 		uint32Vector_D newIndices("newIndices", indView);
 
 		if(! appendNewIndices(newIndices))
 		{
 			fatalErrorInFunction;
+			callAgain = false;
 			return false;
 		}
 
-		if(!notify(
-			internal().time().currentIter(),
-			internal().time().currentTime(),
-			internal().time().dt(),
-			msg,
-			varList))
+		const auto ti = internal().time().TimeInfo();
+		if(!notify(ti, msg, varList))
 		{
 			fatalErrorInFunction;
+			callAgain = false;
 			return false;
 		}
 
-		return false;
+		callAgain = false;
+		return true;
 	}
 
-	return false;
+	callAgain = false;
+	return true;
 	
 }
 
 bool
-pFlow::MPI::boundaryProcessor::iterate(uint32 iterNum, real t, real dt)
+pFlow::MPI::boundaryProcessor::iterate(const timeInfo& ti)
 {
 	return true;
 }
 
 bool
-pFlow::MPI::boundaryProcessor::afterIteration(uint32 iterNum, real t, real dt)
+pFlow::MPI::boundaryProcessor::afterIteration(const timeInfo& ti)
 {
 	
 	uint32 s = size();
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
index 4f62c86c..c9f62819 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
@@ -25,6 +25,7 @@ Licence:
 #include "mpiTypes.hpp"
 #include "dataSender.hpp"
 #include "dataReciever.hpp"
+#include "boundaryConfigs.hpp"
 
 namespace pFlow::MPI
 {
@@ -46,11 +47,9 @@ namespace pFlow::MPI
 
 		Request 			 numPointsRequest0_ = RequestNull;
 
-		realx3Vector_D       neighborProcPoints_;
+		dataSender<realx3>   thisPointsInNeighbor_;
 
-		dataSender<realx3>   sender_;
-
-		dataReciever<realx3> reciever_;
+		dataReciever<realx3> neighborProcPoints_;
 
 		mutable bool         dataRecieved_ = true;
 
@@ -60,6 +59,10 @@ namespace pFlow::MPI
 
 		uint32Vector_D       transferIndices_{"transferIndices"};
 
+		Request 			 numTransferRequest_ = RequestNull;
+
+		Request 			 numRecieveRequest_ = RequestNull;
+
 		void checkDataRecieved() const;
 
 		/// @brief  Update processor boundary data for this processor
@@ -72,7 +75,7 @@ namespace pFlow::MPI
 		/// step is non-blocking recieve to get data.
 		bool updataBoundaryData(int step) override;
 
-		bool transferData(uint32 iter, int step) override;
+		bool transferData(uint32 iter, int step, bool& callAgain) override;
 
 	public:
 		TypeInfo("boundary<processor>");
@@ -91,11 +94,17 @@ namespace pFlow::MPI
 			boundaryProcessor,
 			dictionary);
 
-		bool beforeIteration(uint32 step, uint32 iterNum, real t, real dt) override;
+		bool beforeIteration(
+			uint32 step, 
+			const timeInfo& ti, 
+			bool updateIter, 
+			bool iterBeforeUpdate , 
+			bool& callAgain
+		) override;
 
-		bool iterate(uint32 iterNum, real t, real dt) override;
+		bool iterate(const timeInfo& ti) override;
 
-		bool afterIteration(uint32 iterNum, real t, real dt) override;
+		bool afterIteration(const timeInfo& ti) override;
 
 		/// @brief Return number of points in the neighbor processor boundary.
 		/// This is overriden from boundaryBase.
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
index 547e09f9..c547f112 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
@@ -76,7 +76,8 @@ public:
 
     void recieveData(
         const localProcessors&      processors,
-        uint32 numToRecv
+        uint32 numToRecv,
+        const word& name = "dataReciver"
     )
     {   
         resize(numToRecv);
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
index a4c5d39b..bb4538f8 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
@@ -1,3 +1,23 @@
+/*------------------------------- phasicFlow ---------------------------------
+	  O        C enter of
+	 O O       E ngineering and
+	O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
 #ifndef __dataSender_hpp__
 #define __dataSender_hpp__
 
@@ -61,7 +81,8 @@ public:
 
     void sendData(
         const localProcessors&      processors,
-        const scatteredFieldAccess<T, memory_space>&  scatterField
+        const scatteredFieldAccess<T, memory_space>&  scatterField,
+        const word& name = "dataSender::sendData"
     )
     {
         using RPolicy = Kokkos::RangePolicy<
@@ -79,10 +100,10 @@ public:
         buffer_.clear();
         buffer_.resize(n);
         
-        const auto& buffView = buffer_.deviceViewAll();
+        const auto& buffView = buffer_.deviceViewAll(); 
 
         Kokkos::parallel_for(
-            "dataSender::sendData",
+            "packDataForSend::"+name,
             RPolicy(0,n),
             LAMBDA_HD(uint32 i)
             {

From ba8f307c569e59e3e59e19422446e6c4400f36c7 Mon Sep 17 00:00:00 2001
From: HRN <hamid.r.norouzi@gmail.com>
Date: Fri, 18 Oct 2024 23:10:42 +0330
Subject: [PATCH 13/14] Code recovery MPI part

---
 .../processorAB2BoundaryIntegration.cpp            | 10 +++++-----
 .../domain/MPISimulationDomain.cpp                 | 14 +++-----------
 .../boundaries/boundaryProcessor.cpp               |  5 +----
 .../boundaries/boundaryProcessor.hpp               |  1 +
 4 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp b/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp
index 494f9938..2fd9dbc1 100644
--- a/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp
+++ b/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp
@@ -50,16 +50,16 @@ bool pFlow::processorAB2BoundaryIntegration::correctPStruct(real dt, const realx
 	#ifndef BoundaryModel1
     if(this->isBoundaryMaster())
     {
-        const uint32 thisIndex = thisBoundaryIndex();
+		const uint32 thisIndex = thisBoundaryIndex();
 		const auto&  AB2 =  static_cast<const AdamsBashforth2&>(Integration());
-        const auto& dy1View = AB2.BoundaryField(thisIndex).neighborProcField().deviceView();
+		const auto& dy1View = AB2.BoundaryField(thisIndex).neighborProcField().deviceView();
 		const auto& velView = vel.BoundaryField(thisIndex).neighborProcField().deviceView();
 		const auto& xposView = boundary().neighborProcPoints().deviceView();        
-        const rangeU32 aRange(0u, dy1View.size());
-        return  AB2Kernels::intAllActive(
+		const rangeU32 aRange(0u, dy1View.size());
+		return  AB2Kernels::intAllActive(
 			"AB2Integration::correctPStruct."+this->boundaryName(), 
 			dt,
-            aRange, 
+			aRange, 
 			xposView, 
 			velView, 
 			dy1View
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
index 34890575..37ecb052 100644
--- a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
@@ -36,16 +36,9 @@ pFlow::MPI::MPISimulationDomain::MPISimulationDomain(systemControl& control)
 bool pFlow::MPI::MPISimulationDomain::createBoundaryDicts()
 {
 
-    if(!prepareBoundaryDicts())
-	{
-		return false;
-	}
-
-    auto& boundaries = this->subDict("boundaries");
+    dictionary& boundaries = this->subDict("boundaries");
     
-    
-    auto& thisBoundaries = this->subDict(thisBoundariesDictName());
-
+    dictionary& thisBoundaries = this->subDict(thisBoundariesDictName());
     
     auto neighbors = findPlaneNeighbors();
 
@@ -133,8 +126,7 @@ bool pFlow::MPI::MPISimulationDomain::setThisDomain()
         fatalErrorInFunction<< "Failed to distributed domains"<<endl;
         return false;
     }
-
-
+    
     return true;
 }
 
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
index 17d65a48..f781a00c 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
@@ -291,7 +291,7 @@ bool pFlow::MPI::boundaryProcessor::transferData(
 			callAgain = false;
 			return false;
 		} 
-
+		callAgain = true;
 		return true;
 	}
 	else if(step == 3) // to recieve data 
@@ -367,10 +367,7 @@ bool pFlow::MPI::boundaryProcessor::transferData(
 		callAgain = false;
 		return true;
 	}
-
-	callAgain = false;
 	return true;
-	
 }
 
 bool
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
index c9f62819..bf957541 100644
--- a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
@@ -22,6 +22,7 @@ Licence:
 #define __boundaryProcessor_hpp__
 
 #include "boundaryBase.hpp"
+#include "timeInfo.hpp"
 #include "mpiTypes.hpp"
 #include "dataSender.hpp"
 #include "dataReciever.hpp"

From 0053ef002a2093dd81574d10277652c330f557e6 Mon Sep 17 00:00:00 2001
From: Hamidreza <hamid.r.norouzi@gmail.com>
Date: Fri, 16 May 2025 18:55:25 +0330
Subject: [PATCH 14/14] CMakeList modified for automatic compile of Zoltan

---
 cmake/bashrc                   |  2 +-
 cmake/zoltanInstallCheck.cmake | 44 +++++++++++++++++++++++++++++++
 src/phasicFlow/CMakeLists.txt  | 47 +++++++++++++++-------------------
 3 files changed, 65 insertions(+), 28 deletions(-)
 create mode 100644 cmake/zoltanInstallCheck.cmake

diff --git a/cmake/bashrc b/cmake/bashrc
index e905df58..50c62d8b 100644
--- a/cmake/bashrc
+++ b/cmake/bashrc
@@ -19,7 +19,7 @@ export pFlow_SRC_DIR="$pFlow_PROJECT_DIR/src"
 
 export Kokkos_DIR="$kokkosDir"
 
-export Zoltan_DIR="$projectDir/Zoltan"
+#export Zoltan_DIR="$projectDir/Zoltan"
 
 # Cleanup variables (done as final statement for a clean exit code)
 unset projectDir
diff --git a/cmake/zoltanInstallCheck.cmake b/cmake/zoltanInstallCheck.cmake
new file mode 100644
index 00000000..d0e6d8db
--- /dev/null
+++ b/cmake/zoltanInstallCheck.cmake
@@ -0,0 +1,44 @@
+# Macro to check for Zoltan installation and build it if needed
+# Usage: zoltan_find_or_build(ZOLTAN_DIR)
+# Returns: ZOLTAN_INCLUDE_DIR, ZOLTAN_LIBRARY
+
+macro(zoltan_find_or_build ZOLTAN_DIR)
+    # Set the Zoltan directory
+    set(ZOLTAN_PREFIX "${ZOLTAN_DIR}" CACHE STRING "Zoltan install directory")
+    message(STATUS "Zoltan install directory is ${ZOLTAN_PREFIX}")
+
+    # Check if the Zoltan library is already built
+    find_path(ZOLTAN_INCLUDE_DIR zoltan.h PATHS "${ZOLTAN_PREFIX}/include")
+    message(STATUS "Zoltan include path: ${ZOLTAN_INCLUDE_DIR}")
+
+    find_library(ZOLTAN_LIBRARY zoltan PATHS "${ZOLTAN_PREFIX}/lib")
+    message(STATUS "Zoltan lib path: ${ZOLTAN_LIBRARY}")
+    
+    # Check if Zoltan library exists, if not compile it using buildlib script
+    if(NOT ZOLTAN_LIBRARY)
+        message(STATUS "Zoltan library not found. Compiling from source using buildlib script...")
+
+        # Execute the buildlib bash script
+        execute_process(
+            COMMAND bash ${ZOLTAN_PREFIX}/buildlib
+            WORKING_DIRECTORY ${ZOLTAN_PREFIX}
+            RESULT_VARIABLE ZOLTAN_BUILD_RESULT
+            OUTPUT_VARIABLE ZOLTAN_BUILD_OUTPUT
+            ERROR_VARIABLE ZOLTAN_BUILD_ERROR
+        )
+
+        if(NOT ZOLTAN_BUILD_RESULT EQUAL 0)
+            message(FATAL_ERROR "Failed to build Zoltan library using buildlib script. Error: ${ZOLTAN_BUILD_ERROR}")
+        endif()
+
+        # Try to find the library again after building
+        find_library(ZOLTAN_LIBRARY zoltan PATHS "${ZOLTAN_PREFIX}/lib" NO_DEFAULT_PATH)
+        find_path(ZOLTAN_INCLUDE_DIR zoltan.h PATHS "${ZOLTAN_PREFIX}/include" NO_DEFAULT_PATH)
+
+        if(NOT ZOLTAN_LIBRARY)
+            message(FATAL_ERROR "Failed to locate Zoltan library after building")
+        endif()
+
+        message(STATUS "Successfully built Zoltan library at ${ZOLTAN_LIBRARY}")
+    endif()
+endmacro()
\ No newline at end of file
diff --git a/src/phasicFlow/CMakeLists.txt b/src/phasicFlow/CMakeLists.txt
index db66642e..5af3b7dd 100644
--- a/src/phasicFlow/CMakeLists.txt
+++ b/src/phasicFlow/CMakeLists.txt
@@ -1,4 +1,3 @@
-
 list(APPEND SourceFiles 
 types/basicTypes/bTypesFunctions.cpp
 types/basicTypes/Logical.cpp
@@ -119,35 +118,27 @@ set(link_libs)
 set(link_libs Kokkos::kokkos tbb)
 	
 
-
+# for MPI parallelization
 if(pFlow_Build_MPI)
 
-	set(Zoltan_Install_DIR)
-	if(DEFINED ENV{Zoltan_DIR})
-	   set(Zoltan_Install_DIR $ENV{Zoltan_DIR})
-	else()
-	  set(Zoltan_Install_DIR $ENV{HOME}/PhasicFlow/Zoltan)
-	endif()
-	message(STATUS "Zoltan install directory is ${Zoltan_Install_DIR}")
-
-	set(ZOLTAN_PREFIX "${Zoltan_Install_DIR}" CACHE STRING "Zoltan install directory")
-
-	find_path(ZOLTAN_INCLUDE_DIR zoltan.h PATHS "${ZOLTAN_PREFIX}/include")
-
-	message(STATUS "Zoltan include path: ${ZOLTAN_INCLUDE_DIR}")
-
-	find_library(ZOLTAN_LIBRARY zoltan PATHS "${ZOLTAN_PREFIX}/lib")
-	message(STATUS "Zoltan lib path: ${ZOLTAN_LIBRARY}")
+	# Include the Zoltan installation check macro
+	include(${CMAKE_SOURCE_DIR}/cmake/zoltanInstallCheck.cmake)
+	
+	# set the Zoltan Directory and check/build if needed
+	set(Zoltan_Install_DIR ${CMAKE_SOURCE_DIR}/thirdParty/Zoltan)
+	
+	# Call the macro to find or build Zoltan
+	zoltan_find_or_build(${Zoltan_Install_DIR})
 	
 	list(APPEND SourceFiles
-	MPIParallelization/domain/partitioning/partitioning.cpp 
-	MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
-	MPIParallelization/domain/MPISimulationDomain.cpp
-    MPIParallelization/dataIOMPI/dataIOMPIs.cpp
-    MPIParallelization/MPI/procCommunication.cpp
-    MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
-	MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
-	MPIParallelization/pointField/processorBoundaryFields.cpp
+		MPIParallelization/domain/partitioning/partitioning.cpp 
+		MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
+		MPIParallelization/domain/MPISimulationDomain.cpp
+		MPIParallelization/dataIOMPI/dataIOMPIs.cpp
+		MPIParallelization/MPI/procCommunication.cpp
+		MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
+		MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
+		MPIParallelization/pointField/processorBoundaryFields.cpp
 	)
 	
 	list(APPEND link_libs MPI::MPI_CXX ${ZOLTAN_LIBRARY} -lm )
@@ -155,8 +146,10 @@ if(pFlow_Build_MPI)
 	target_include_directories(phasicFlow PUBLIC ./globals ${ZOLTAN_INCLUDE_DIR})
 
 else()
-	pFlow_add_library_install(phasicFlow SourceFiles link_libs)
+	
+pFlow_add_library_install(phasicFlow SourceFiles link_libs)
 	target_include_directories(phasicFlow PUBLIC ./globals)
+
 endif()