diff --git a/cmake/bashrc b/cmake/bashrc
index e905df58..50c62d8b 100644
--- a/cmake/bashrc
+++ b/cmake/bashrc
@@ -19,7 +19,7 @@ export pFlow_SRC_DIR="$pFlow_PROJECT_DIR/src"
 
 export Kokkos_DIR="$kokkosDir"
 
-export Zoltan_DIR="$projectDir/Zoltan"
+#export Zoltan_DIR="$projectDir/Zoltan"
 
 # Cleanup variables (done as final statement for a clean exit code)
 unset projectDir
diff --git a/cmake/zoltanInstallCheck.cmake b/cmake/zoltanInstallCheck.cmake
new file mode 100644
index 00000000..d0e6d8db
--- /dev/null
+++ b/cmake/zoltanInstallCheck.cmake
@@ -0,0 +1,44 @@
+# Macro to check for Zoltan installation and build it if needed
+# Usage: zoltan_find_or_build(ZOLTAN_DIR)
+# Returns: ZOLTAN_INCLUDE_DIR, ZOLTAN_LIBRARY
+
+macro(zoltan_find_or_build ZOLTAN_DIR)
+    # Set the Zoltan directory
+    set(ZOLTAN_PREFIX "${ZOLTAN_DIR}" CACHE STRING "Zoltan install directory")
+    message(STATUS "Zoltan install directory is ${ZOLTAN_PREFIX}")
+
+    # Check if the Zoltan library is already built
+    find_path(ZOLTAN_INCLUDE_DIR zoltan.h PATHS "${ZOLTAN_PREFIX}/include")
+    message(STATUS "Zoltan include path: ${ZOLTAN_INCLUDE_DIR}")
+
+    find_library(ZOLTAN_LIBRARY zoltan PATHS "${ZOLTAN_PREFIX}/lib")
+    message(STATUS "Zoltan lib path: ${ZOLTAN_LIBRARY}")
+    
+    # Check if Zoltan library exists, if not compile it using buildlib script
+    if(NOT ZOLTAN_LIBRARY)
+        message(STATUS "Zoltan library not found. Compiling from source using buildlib script...")
+
+        # Execute the buildlib bash script
+        execute_process(
+            COMMAND bash ${ZOLTAN_PREFIX}/buildlib
+            WORKING_DIRECTORY ${ZOLTAN_PREFIX}
+            RESULT_VARIABLE ZOLTAN_BUILD_RESULT
+            OUTPUT_VARIABLE ZOLTAN_BUILD_OUTPUT
+            ERROR_VARIABLE ZOLTAN_BUILD_ERROR
+        )
+
+        if(NOT ZOLTAN_BUILD_RESULT EQUAL 0)
+            message(FATAL_ERROR "Failed to build Zoltan library using buildlib script. Error: ${ZOLTAN_BUILD_ERROR}")
+        endif()
+
+        # Try to find the library again after building
+        find_library(ZOLTAN_LIBRARY zoltan PATHS "${ZOLTAN_PREFIX}/lib" NO_DEFAULT_PATH)
+        find_path(ZOLTAN_INCLUDE_DIR zoltan.h PATHS "${ZOLTAN_PREFIX}/include" NO_DEFAULT_PATH)
+
+        if(NOT ZOLTAN_LIBRARY)
+            message(FATAL_ERROR "Failed to locate Zoltan library after building")
+        endif()
+
+        message(STATUS "Successfully built Zoltan library at ${ZOLTAN_LIBRARY}")
+    endif()
+endmacro()
\ No newline at end of file
diff --git a/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp b/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp
new file mode 100644
index 00000000..2fd9dbc1
--- /dev/null
+++ b/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.cpp
@@ -0,0 +1,71 @@
+#include "processorAB2BoundaryIntegration.hpp"
+#include "AdamsBashforth2.hpp"
+#include "AB2Kernels.hpp"
+#include "boundaryConfigs.hpp"
+
+pFlow::processorAB2BoundaryIntegration::processorAB2BoundaryIntegration(
+    const boundaryBase &boundary, 
+    const pointStructure &pStruct,  
+    const word &method,
+    integration& intgrtn
+)
+:
+    boundaryIntegration(boundary, pStruct, method, intgrtn)
+{}
+
+bool pFlow::processorAB2BoundaryIntegration::correct(
+    real dt, 
+	const realx3PointField_D& y, 
+	const realx3PointField_D& dy
+)
+{
+
+#ifndef BoundaryModel1
+    if(this->isBoundaryMaster())
+    {
+        const uint32 thisIndex = thisBoundaryIndex();
+		const auto&  AB2 =  static_cast<const AdamsBashforth2&>(Integration());
+        const auto& dy1View = AB2.BoundaryField(thisIndex).neighborProcField().deviceView();
+		const auto& dyView = dy.BoundaryField(thisIndex).neighborProcField().deviceView();
+		const auto& yView = y.BoundaryField(thisIndex).neighborProcField().deviceView();        
+        const rangeU32 aRange(0u, dy1View.size());
+        return  AB2Kernels::intAllActive(
+			"AB2Integration::correct."+this->boundaryName(), 
+			dt,
+            aRange, 
+			yView, 
+			dyView, 
+			dy1View
+		); 
+    }
+#endif //BoundaryModel1
+    
+    
+    return true;
+}
+
+bool pFlow::processorAB2BoundaryIntegration::correctPStruct(real dt, const realx3PointField_D &vel)
+{
+
+	#ifndef BoundaryModel1
+    if(this->isBoundaryMaster())
+    {
+		const uint32 thisIndex = thisBoundaryIndex();
+		const auto&  AB2 =  static_cast<const AdamsBashforth2&>(Integration());
+		const auto& dy1View = AB2.BoundaryField(thisIndex).neighborProcField().deviceView();
+		const auto& velView = vel.BoundaryField(thisIndex).neighborProcField().deviceView();
+		const auto& xposView = boundary().neighborProcPoints().deviceView();        
+		const rangeU32 aRange(0u, dy1View.size());
+		return  AB2Kernels::intAllActive(
+			"AB2Integration::correctPStruct."+this->boundaryName(), 
+			dt,
+			aRange, 
+			xposView, 
+			velView, 
+			dy1View
+		); 
+    }
+#endif //BoundaryModel1
+    
+    return true;
+}
diff --git a/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.hpp b/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.hpp
new file mode 100644
index 00000000..7ff6aacb
--- /dev/null
+++ b/src/Integration/AdamsBashforth2/processorAB2BoundaryIntegration.hpp
@@ -0,0 +1,51 @@
+
+
+#ifndef __processorAB2BoundaryIntegration_hpp__
+#define __processorAB2BoundaryIntegration_hpp__
+
+#include "boundaryIntegration.hpp"
+
+namespace pFlow
+{
+
+class processorAB2BoundaryIntegration
+:
+    public boundaryIntegration
+{   
+public:
+
+    TypeInfo("boundaryIntegration<processor,AdamsBashforth2>");
+
+    processorAB2BoundaryIntegration(
+        const boundaryBase& boundary,
+        const pointStructure& pStruct, 
+        const word& method,
+        integration& intgrtn
+    );
+
+    ~processorAB2BoundaryIntegration()override=default;
+
+    
+	bool correct(
+        real dt, 
+        const realx3PointField_D& y, 
+        const realx3PointField_D& dy)override;
+	
+
+    
+    bool correctPStruct(real dt, const realx3PointField_D& vel)override;
+    
+    
+    add_vCtor(
+        boundaryIntegration,
+        processorAB2BoundaryIntegration,
+        boundaryBase
+    );
+
+    
+
+};
+
+}
+
+#endif
\ No newline at end of file
diff --git a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
new file mode 100644
index 00000000..323f23f7
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.cpp
@@ -0,0 +1,111 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "processorBoundaryContactSearch.hpp"
+#include "contactSearch.hpp"
+#include "particles.hpp"
+//#include "pointStructure.hpp"
+//#include "geometry.hpp"
+
+
+void pFlow::processorBoundaryContactSearch::setSearchBox()
+{
+       
+    auto l = boundary().neighborLength();
+    auto n = boundary().boundaryPlane().normal();
+    auto pp1 = boundary().boundaryPlane().parallelPlane(l);
+    auto pp2 = boundary().boundaryPlane().parallelPlane(-l);
+
+    realx3 minP1 = min(min(min(pp1.p1(), pp1.p2()), pp1.p3()), pp1.p4());
+    realx3 maxP1 = max(max(max(pp1.p1(), pp1.p2()), pp1.p3()), pp1.p4());
+
+    realx3 minP2 = min(min(min(pp2.p1(), pp2.p2()), pp2.p3()), pp2.p4());
+    realx3 maxP2 = max(max(max(pp2.p1(), pp2.p2()), pp2.p3()), pp2.p4());
+
+    auto minP = min(minP1, minP2) - l*(realx3(1.0)-abs(n));
+    auto maxP = max(maxP1, maxP2) + l*(realx3(1.0)-abs(n));
+    
+    searchBox_={minP, maxP};
+}
+
+pFlow::processorBoundaryContactSearch::processorBoundaryContactSearch(
+    const dictionary &dict,
+    const boundaryBase &boundary,
+    const contactSearch &cSearch)
+: 
+    boundaryContactSearch(dict, boundary, cSearch),
+    diameter_(cSearch.Particles().boundingSphere()),
+    masterSearch_(this->isBoundaryMaster()),
+    sizeRatio_(dict.getVal<real>("sizeRatio"))
+{
+    
+    if(masterSearch_)
+    {        
+        setSearchBox();
+        
+        real minD;
+        real maxD;
+        cSearch.Particles().boundingSphereMinMax(minD, maxD);
+        
+        ppContactSearch_ = makeUnique<twoPartContactSearch>(
+            searchBox_,
+            maxD,
+            sizeRatio_);
+    }
+    else
+    {
+        searchBox_={{0,0,0},{0,0,0}};
+    }
+}
+
+bool pFlow::processorBoundaryContactSearch::broadSearch
+(
+    uint32 iter, 
+    real t, 
+    real dt, 
+    csPairContainerType &ppPairs, 
+    csPairContainerType &pwPairs, 
+    bool force
+)
+{
+    if(masterSearch_)
+    {
+        const auto thisPoints = boundary().thisPoints();
+        const auto& neighborProcPoints = boundary().neighborProcPoints();
+        const auto& bDiams = diameter_.BoundaryField(thisBoundaryIndex());
+        const auto thisDiams = bDiams.thisField();
+        const auto& neighborProcDiams = bDiams.neighborProcField();
+        
+        ppContactSearch_().broadSearchPP(
+            ppPairs, 
+            thisPoints, 
+            thisDiams,
+            neighborProcPoints,
+            neighborProcDiams,
+            boundaryName()
+        );
+        //pOutput<<"ppSize "<< ppPairs.size()<<endl;
+        return true;
+
+    }else
+    {
+        return true;
+    }
+}
diff --git a/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp
new file mode 100644
index 00000000..8f4477b0
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/processorBoundaryContactSearch/processorBoundaryContactSearch.hpp
@@ -0,0 +1,76 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __processorBoundaryContactSearch_hpp__
+#define __processorBoundaryContactSearch_hpp__
+
+#include "boundaryContactSearch.hpp"
+#include "pointFields.hpp"
+#include "twoPartContactSearch.hpp"
+
+namespace pFlow
+{
+
+class processorBoundaryContactSearch : public boundaryContactSearch
+{
+private:
+
+	box                             searchBox_;
+
+	uniquePtr<twoPartContactSearch> ppContactSearch_ = nullptr;
+
+	const realPointField_D&         diameter_;
+
+	bool                            masterSearch_;
+
+	real 							sizeRatio_;
+
+	void                            setSearchBox();
+
+public:
+
+	TypeInfo("boundaryContactSearch<MPI,processor>")
+
+	  processorBoundaryContactSearch(
+	    const dictionary&    dict,
+	    const boundaryBase&  boundary,
+	    const contactSearch& cSearch
+	  );
+
+	~processorBoundaryContactSearch() override = default;
+
+	add_vCtor(
+	  boundaryContactSearch,
+	  processorBoundaryContactSearch,
+	  boundaryBase
+	);
+
+	bool broadSearch(
+	  uint32               iter,
+	  real                 t,
+	  real                 dt,
+	  csPairContainerType& ppPairs,
+	  csPairContainerType& pwPairs,
+	  bool                 force = false
+	) override;
+};
+
+}
+
+#endif //__processorBoundaryContactSearch_hpp__
\ No newline at end of file
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
new file mode 100644
index 00000000..a7e61f86
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.cpp
@@ -0,0 +1,163 @@
+
+#include "twoPartContactSearch.hpp"
+#include "twoPartContactSearchKernels.hpp"
+#include "phasicFlowKokkos.hpp"
+#include "streams.hpp"
+
+void pFlow::twoPartContactSearch::checkAllocateNext(uint32 n)
+{
+	if( nextCapacity_ < n)
+	{
+		nextCapacity_ = n;	
+		reallocNoInit(next_, n);
+	}
+}
+
+void pFlow::twoPartContactSearch::nullifyHead()
+{
+	fill(head_, static_cast<uint32>(-1));
+}
+
+void pFlow::twoPartContactSearch::nullifyNext(uint32 n)
+{
+	fill(next_, 0u, n, static_cast<uint32>(-1));
+}
+
+void pFlow::twoPartContactSearch::buildList(
+	const deviceScatteredFieldAccess<realx3> &points)
+{
+	if(points.empty())return;
+	uint32 n = points.size();
+	checkAllocateNext(n);
+	nullifyNext(n);
+	nullifyHead();
+	
+	pFlow::twoPartContactSearchKernels::buildNextHead(
+		points,
+		searchCells_,
+		head_,
+		next_
+	);
+}
+
+pFlow::twoPartContactSearch::twoPartContactSearch
+(
+    const box &domain,
+    real cellSize,
+	real sizeRatio
+)
+:
+	searchCells_(domain, cellSize),
+	head_("periodic:head",searchCells_.nx(), searchCells_.ny(), searchCells_.nz()),
+	sizeRatio_(sizeRatio)
+{
+
+}
+
+bool pFlow::twoPartContactSearch::broadSearchPP
+(
+	csPairContainerType &ppPairs, 
+	const deviceScatteredFieldAccess<realx3> &points1, 
+	const deviceScatteredFieldAccess<real>& diams1, 
+	const deviceScatteredFieldAccess<realx3> &points2, 
+	const deviceScatteredFieldAccess<real>& diams2, 
+	const realx3& transferVec
+)
+{
+	if(points1.empty())return true;
+	if(points2.empty()) return true;
+
+	buildList(points1);
+
+	uint32 nNotInserted = 1;
+
+	// loop until the container size fits the numebr of contact pairs
+	while (nNotInserted > 0)
+	{
+
+		nNotInserted = pFlow::twoPartContactSearchKernels::broadSearchPP
+		(
+			ppPairs,
+			points1,
+			diams1,
+			points2,
+			diams2,
+			transferVec,
+			head_,
+			next_,
+			searchCells_,
+			sizeRatio_
+		);
+		
+	
+		if(nNotInserted)
+		{
+			// - resize the container
+			//   note that getFull now shows the number of failed insertions.
+			uint32 len = max(nNotInserted,100u) ;
+			
+			auto oldCap = ppPairs.capacity();
+			
+			ppPairs.increaseCapacityBy(len);
+
+			INFORMATION<< "Particle-particle contact pair container capacity increased from "<<
+			oldCap << " to "<<ppPairs.capacity()<<" in contact search in boundary region."<<END_INFO;
+			
+		}
+
+	}	
+	
+    return true;
+}
+
+bool pFlow::twoPartContactSearch::broadSearchPP
+(
+	csPairContainerType &ppPairs,
+	const deviceScatteredFieldAccess<realx3> &points1,
+	const deviceScatteredFieldAccess<real> &diams1,
+	const realx3Vector_D& points2,
+	const realVector_D& diams2,
+	const word& name
+)
+{
+	buildList(points1);
+
+	uint32 nNotInserted = 1;
+
+	// loop until the container size fits the numebr of contact pairs
+	while (nNotInserted > 0)
+	{
+
+		nNotInserted = pFlow::twoPartContactSearchKernels::broadSearchPP
+		(
+			ppPairs,
+			points1,
+			diams1,
+			points2,
+			diams2,
+			head_,
+			next_,
+			searchCells_,
+			sizeRatio_
+		);
+		
+	
+		if(nNotInserted)
+		{
+			// - resize the container
+			//   note that getFull now shows the number of failed insertions.
+			uint32 len = max(nNotInserted,100u) ;
+			
+			auto oldCap = ppPairs.capacity();
+			
+			ppPairs.increaseCapacityBy(len);
+			
+			INFORMATION<< "Particle-particle contact pair container capacity increased from "<<
+			oldCap << " to "<<ppPairs.capacity()<<" in boundary contact search in "<< name <<END_INFO;
+			
+		}
+
+	}	
+	
+    return true;
+}
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp
new file mode 100644
index 00000000..b2f54b8b
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearch.hpp
@@ -0,0 +1,104 @@
+/*------------------------------- phasicFlow ---------------------------------
+	  O        C enter of
+	 O O       E ngineering and
+	O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __twoPartContactSearch_hpp__
+#define __twoPartContactSearch_hpp__
+
+#include "contactSearchGlobals.hpp"
+#include "scatteredFieldAccess.hpp"
+#include "cells.hpp"
+#include "VectorSingles.hpp"
+
+namespace pFlow
+{
+
+class twoPartContactSearch
+{
+public:
+	using HeadType = deviceViewType3D<uint32>;
+
+	using NextType = deviceViewType1D<uint32>;
+
+private:
+
+	cells    searchCells_;
+
+	HeadType head_{ "periodic::head", 1, 1, 1 };
+
+	NextType next_{ "periodic::next", 1 };
+
+	real     sizeRatio_ = 1.0;
+
+	uint32   nextCapacity_ = 0;
+
+	void checkAllocateNext(uint32 n);
+
+	void nullifyHead();
+
+	void nullifyNext(uint32 n);
+
+	void buildList(
+		const deviceScatteredFieldAccess<realx3> &points);
+
+public:
+	twoPartContactSearch(
+		const box &domain,
+		real cellSize,
+		real sizeRatio = 1.0);
+
+	/// @brief Perform a broad-search for spheres in two adjacent regions.
+	/// Region 1 is considered as the master (primary) region and region 2 as slave 
+	/// @param ppPairs pairs container which holds i and j
+	/// @param points1 point positions in region 1
+	/// @param diams1  diameter of spheres in region 1
+	/// @param points2 point positions in region 2
+	/// @param diams2 diameter of spheres in region 2
+	/// @param transferVec a vector to transfer points from region 2 to region 1
+	/// @return true if it is successful 
+	bool broadSearchPP(
+		csPairContainerType &ppPairs,
+		const deviceScatteredFieldAccess<realx3> &points1,
+		const deviceScatteredFieldAccess<real> &diams1,
+		const deviceScatteredFieldAccess<realx3> &points2,
+		const deviceScatteredFieldAccess<real> &diams2,
+		const realx3 &transferVec);
+	
+	bool broadSearchPP(
+		csPairContainerType &ppPairs,
+		const deviceScatteredFieldAccess<realx3> &points1,
+		const deviceScatteredFieldAccess<real> &diams1,
+		const realx3Vector_D& points2,
+		const realVector_D& diams2,
+		const word& name);
+
+	const auto& searchCells()const
+	{
+		return searchCells_;
+	}
+
+	real sizeRatio()const
+	{
+		return sizeRatio_;
+	}
+};
+
+}
+
+#endif //__twoPartContactSearch_hpp__
\ No newline at end of file
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
new file mode 100644
index 00000000..9faa80fc
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.cpp
@@ -0,0 +1,186 @@
+#include "twoPartContactSearchKernels.hpp"
+
+INLINE_FUNCTION_HD
+bool
+sphereSphereCheckB(
+  const pFlow::realx3& p1,
+  const pFlow::realx3  p2,
+  pFlow::real          d1,
+  pFlow::real          d2
+)
+{
+	return pFlow::length(p2 - p1) < 0.5 * (d2 + d1);
+}
+
+void
+pFlow::twoPartContactSearchKernels::buildNextHead(
+  const deviceScatteredFieldAccess<realx3>& points,
+  const cells&                              searchCells,
+  deviceViewType3D<uint32>&                 head,
+  deviceViewType1D<uint32>&                 next
+)
+{
+	
+	uint32 n = points.size();
+
+	Kokkos::parallel_for(
+	  "pFlow::ppwBndryContactSearch::buildList",
+	  deviceRPolicyStatic(0, n),
+	  LAMBDA_HD(uint32 i) {
+		  int32x3 ind;
+		  if (searchCells.pointIndexInDomain(points[i], ind))
+		  {
+			  // discards points out of searchCell
+			  uint32 old =
+			    Kokkos::atomic_exchange(&head(ind.x(), ind.y(), ind.z()), i);
+			  next[i] = old;
+		  }
+	  }
+	);
+	Kokkos::fence();
+}
+
+pFlow::uint32
+pFlow::twoPartContactSearchKernels::broadSearchPP(
+  csPairContainerType&                      ppPairs,
+  const deviceScatteredFieldAccess<realx3>& points,
+  const deviceScatteredFieldAccess<real>&   diams,
+  const deviceScatteredFieldAccess<realx3>& mirrorPoints,
+  const deviceScatteredFieldAccess<real>&   mirrorDiams,
+  const realx3&                             transferVec,
+  const deviceViewType3D<uint32>&           head,
+  const deviceViewType1D<uint32>&           next,
+  const cells&                              searchCells,
+  const real                                sizeRatio
+)
+{
+	if (points.empty())
+		return 0;
+	if (mirrorPoints.empty())
+		return 0;
+
+	auto   nMirror = mirrorPoints.size();
+
+	uint32 getFull = 0;
+
+	Kokkos::parallel_reduce(
+	  "pFlow::twoPartContactSearchKernels::broadSearchPP",
+	  deviceRPolicyStatic(0, nMirror),
+	  LAMBDA_HD(const uint32 mrrI, uint32& getFullUpdate) {
+		  realx3  p_m = mirrorPoints(mrrI) + transferVec;
+
+		  int32x3 ind_m;
+		  if (!searchCells.pointIndexInDomain(p_m, ind_m))
+			  return;
+
+		  real d_m = sizeRatio * mirrorDiams[mrrI];
+
+		  for (int ii = -1; ii < 2; ii++)
+		  {
+			  for (int jj = -1; jj < 2; jj++)
+			  {
+				  for (int kk = -1; kk < 2; kk++)
+				  {
+					  auto ind = ind_m + int32x3{ ii, jj, kk };
+
+					  if (!searchCells.inCellRange(ind))
+						  continue;
+
+					  uint32 thisI = head(ind.x(), ind.y(), ind.z());
+					  while (thisI !=  static_cast<uint32>(-1))
+					  {
+						  auto d_n = sizeRatio * diams[thisI];
+
+						  // first item is for this boundary and second itme,
+						  // for mirror
+						  if(sphereSphereCheckB(p_m, points[thisI], d_m, d_n)&&
+                       ppPairs.insert(thisI,mrrI) == static_cast<uint32>(-1))
+						  {
+							  getFullUpdate++;
+						  }
+
+						  thisI = next(thisI);
+					  }
+				  }
+			  }
+		  }
+	  },
+	  getFull
+	);
+
+	return getFull;
+}
+
+pFlow::uint32
+pFlow::twoPartContactSearchKernels::broadSearchPP(
+  csPairContainerType&                      ppPairs,
+  const deviceScatteredFieldAccess<realx3>& points1,
+  const deviceScatteredFieldAccess<real>&   diams1,
+  const realx3Vector_D&                     points2,
+  const realVector_D&                       diams2,
+  const deviceViewType3D<uint32>&           head,
+  const deviceViewType1D<uint32>&           next,
+  const cells&                              searchCells,
+  real                                      sizeRatio
+)
+{
+	if (points1.empty())
+		return 0;
+	if (points2.empty())
+		return 0;
+
+	auto   nP2         = points2.size();
+	auto   points2View = points2.deviceView();
+	auto   diams2View  = diams2.deviceView();
+
+	uint32 getFull = 0;
+
+	Kokkos::parallel_reduce(
+	  "pFlow::twoPartContactSearchKernels::broadSearchPP",
+	  deviceRPolicyStatic(0, nP2),
+	  LAMBDA_HD(const uint32 i2, uint32& getFullUpdate) {
+		  realx3  p_m = points2View(i2);
+
+		  int32x3 ind_m;
+		  if (!searchCells.pointIndexInDomain(p_m, ind_m))
+			  return;
+
+		  real d_m = sizeRatio * diams2View[i2];
+
+		  for (int ii = -1; ii < 2; ii++)
+		  {
+			  for (int jj = -1; jj < 2; jj++)
+			  {
+				  for (int kk = -1; kk < 2; kk++)
+				  {
+					  auto ind = ind_m + int32x3{ ii, jj, kk };
+
+					  if (!searchCells.inCellRange(ind))
+					  {
+						  continue;
+					  }
+
+					  uint32 i1 = head(ind.x(), ind.y(), ind.z());
+					  while (i1 != static_cast<uint32>(-1))
+					  {
+						  auto d_n = sizeRatio * diams1[i1];
+
+						  // first item is for this boundary and second itme,
+						  // for mirror
+						  if(sphereSphereCheckB(p_m, points1[i1], d_m, d_n)&&
+                       ppPairs.insert(i1,i2) == static_cast<uint32>(-1))
+						  {
+							  getFullUpdate++;
+						  }
+
+						  i1 = next(i1);
+					  }
+				  }
+			  }
+		  }
+	  },
+	  getFull
+	);
+
+	return getFull;
+}
\ No newline at end of file
diff --git a/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.hpp b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.hpp
new file mode 100644
index 00000000..42f7cda1
--- /dev/null
+++ b/src/Interaction/contactSearch/boundaries/twoPartContactSearch/twoPartContactSearchKernels.hpp
@@ -0,0 +1,49 @@
+#ifndef __twoPartContactSearchKernels_hpp__
+#define __twoPartContactSearchKernels_hpp__
+
+#include "contactSearchGlobals.hpp"
+#include "cells.hpp"
+#include "contactSearchFunctions.hpp"
+#include "scatteredFieldAccess.hpp"
+#include "VectorSingles.hpp"
+
+namespace pFlow::twoPartContactSearchKernels
+{
+
+void buildNextHead(
+    const deviceScatteredFieldAccess<realx3> &points,
+    const cells              &searchCells,
+    deviceViewType3D<uint32> &head, 
+    deviceViewType1D<uint32> &next );
+
+
+uint32 broadSearchPP
+(
+	csPairContainerType 						&ppPairs,
+	const deviceScatteredFieldAccess<realx3> 	&points, 
+	const deviceScatteredFieldAccess<real> 		&diams, 
+	const deviceScatteredFieldAccess<realx3>	&mirrorPoints, 
+	const deviceScatteredFieldAccess<real> 		&mirrorDiams, 
+	const realx3 								&transferVec,
+    const deviceViewType3D<uint32>				&head,
+    const deviceViewType1D<uint32>				&next,
+    const cells									&searchCells,
+	real sizeRatio
+);
+
+uint32
+broadSearchPP(
+	csPairContainerType&                      ppPairs,
+	const deviceScatteredFieldAccess<realx3>& points1,
+	const deviceScatteredFieldAccess<real>&   diams1,
+	const realx3Vector_D&                     points2,
+	const realVector_D&                       diams2,
+	const deviceViewType3D<uint32>&           head,
+	const deviceViewType1D<uint32>&           next,
+	const cells&                              searchCells,
+	real                                      sizeRatio
+);
+}
+
+
+#endif //__twoPartContactSearchKernels_hpp__
\ No newline at end of file
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp
new file mode 100644
index 00000000..ee809c4b
--- /dev/null
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySIKernels.hpp
@@ -0,0 +1,132 @@
+
+#ifndef __processorBoundarySIKernels_hpp__
+#define __processorBoundarySIKernels_hpp__
+
+namespace pFlow::MPI::processorBoundarySIKernels
+{
+
+template<typename ContactListType, typename ContactForceModel>
+inline
+void sphereSphereInteraction
+(
+    const word& kernalName,
+    real dt,
+    const ContactListType&              cntctList,
+    const ContactForceModel&            forceModel,
+    const deviceScatteredFieldAccess<realx3>& thisPoints,
+    const deviceViewType1D<real>& thisDiam,
+    const deviceViewType1D<uint32>& thisPropId,
+    const deviceViewType1D<realx3>& thisVel,
+    const deviceViewType1D<realx3>& thisRVel,
+    const deviceViewType1D<realx3>& thisCForce,
+    const deviceViewType1D<realx3>& thisCTorque,
+    const deviceViewType1D<realx3>& neighborPoints,
+    const deviceViewType1D<real>& neighborDiam,
+    const deviceViewType1D<uint32>& neighborPropId,
+    const deviceViewType1D<realx3>& neighborVel,
+    const deviceViewType1D<realx3>& neighborRVel,
+    const deviceViewType1D<realx3>& neighborCForce,
+    const deviceViewType1D<realx3>& neighborCTorque
+)
+{
+    
+    using ValueType = typename ContactListType::ValueType;
+    uint32 ss = cntctList.size();
+    if(ss == 0u)return;
+
+    uint32 lastItem = cntctList.loopCount();
+
+    Kokkos::parallel_for(
+        kernalName,
+        deviceRPolicyDynamic(0,lastItem),
+        LAMBDA_HD(uint32 n)
+        {
+
+        if(!cntctList.isValid(n))return;
+
+            auto [i,j] = cntctList.getPair(n);
+            uint32 ind_i = thisPoints.index(i);
+            uint32 ind_j = j;
+
+            real Ri = 0.5*thisDiam[ind_i];
+            real Rj = 0.5*neighborDiam[ind_j];
+            realx3 xi = thisPoints.field()[ind_i];
+            realx3 xj = neighborPoints[ind_j];
+
+            real dist = length(xj-xi);
+            real ovrlp = (Ri+Rj) - dist;
+            
+            if( ovrlp >0.0 )
+            {
+                auto Nij = (xj-xi)/max(dist,smallValue);
+                auto wi = thisRVel[ind_i];
+                auto wj = neighborRVel[ind_j];
+                auto Vr = thisVel[ind_i] - neighborVel[ind_j] + cross((Ri*wi+Rj*wj), Nij);
+                
+                auto history = cntctList.getValue(n);
+
+                int32 propId_i = thisPropId[ind_i];
+                int32 propId_j = neighborPropId[ind_j];
+
+                realx3 FCn, FCt, Mri, Mrj, Mij, Mji;
+
+                // calculates contact force 
+                forceModel.contactForce(
+                    dt, i, j,
+                    propId_i, propId_j,
+                    Ri, Rj,
+                    ovrlp,
+                    Vr, Nij,
+                    history,
+                    FCn, FCt);
+
+                forceModel.rollingFriction(
+                    dt, i, j,
+                    propId_i, propId_j,
+                    Ri, Rj,
+                    wi, wj,
+                    Nij,
+                    FCn,
+                    Mri, Mrj);
+
+                auto M = cross(Nij,FCt);
+                Mij = Ri*M+Mri;
+                Mji = Rj*M+Mrj;
+                
+                auto FC = FCn + FCt;
+                
+
+                Kokkos::atomic_add(&thisCForce[ind_i].x_,FC.x_);
+                Kokkos::atomic_add(&thisCForce[ind_i].y_,FC.y_);
+                Kokkos::atomic_add(&thisCForce[ind_i].z_,FC.z_);
+
+                Kokkos::atomic_add(&neighborCForce[ind_j].x_,-FC.x_);
+                Kokkos::atomic_add(&neighborCForce[ind_j].y_,-FC.y_);
+                Kokkos::atomic_add(&neighborCForce[ind_j].z_,-FC.z_);
+
+                Kokkos::atomic_add(&thisCTorque[ind_i].x_, Mij.x_);
+                Kokkos::atomic_add(&thisCTorque[ind_i].y_, Mij.y_);
+                Kokkos::atomic_add(&thisCTorque[ind_i].z_, Mij.z_);
+
+                Kokkos::atomic_add(&neighborCTorque[ind_j].x_, Mji.x_);
+                Kokkos::atomic_add(&neighborCTorque[ind_j].y_, Mji.y_);
+                Kokkos::atomic_add(&neighborCTorque[ind_j].z_, Mji.z_);
+                
+
+                cntctList.setValue(n,history);
+
+            }
+            else
+            {
+                cntctList.setValue(n, ValueType());
+            }
+
+        });
+    Kokkos::fence();
+}
+
+
+} //pFlow::MPI::processorBoundarySIKernels
+
+
+#endif //__processorBoundarySIKernels_hpp__
\ No newline at end of file
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
new file mode 100644
index 00000000..231260da
--- /dev/null
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.cpp
@@ -0,0 +1,256 @@
+/*------------------------------- phasicFlow ---------------------------------
+	  O        C enter of
+	 O O       E ngineering and
+	O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "processorBoundarySIKernels.hpp"
+
+template <typename cFM, typename gMM>
+pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::processorBoundarySphereInteraction(
+	const boundaryBase &boundary,
+	const sphereParticles &sphPrtcls,
+	const GeometryMotionModel &geomMotion)
+: 
+	boundarySphereInteraction<cFM,gMM>(
+		boundary, 
+		sphPrtcls, 
+		geomMotion
+	),
+	masterInteraction_(boundary.isBoundaryMaster())
+{
+	if(masterInteraction_)
+	{
+		this->allocatePPPairs();
+		this->allocatePWPairs();
+	}
+		
+}
+
+
+#ifdef BoundaryModel1
+
+template <typename cFM, typename gMM>
+bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInteraction
+(
+	real dt,
+	const ContactForceModel &cfModel,
+	uint32 step
+)
+{
+	
+	// master processor calculates the contact force/torque and sends data back to the 
+	// neighbor processor (slave processor).
+	// slave processor recieves the data and adds the data to the internalField 
+	if(masterInteraction_) 
+	{
+		if(step==1)return true;
+
+		const auto & sphPar = this->sphParticles();
+		uint32 thisIndex = this->boundary().thisBoundaryIndex();
+		
+		const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&> (
+		sphPar.contactForce().BoundaryField(thisIndex));
+
+		const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+		sphPar.contactTorque().BoundaryField(thisIndex));
+
+		if(step == 2 )
+		{
+			iter++;
+			pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
+				"ppBoundaryInteraction."+this->boundaryName(),
+				dt,
+				this->ppPairs(),
+				cfModel,
+				this->boundary().thisPoints(),
+				sphPar.diameter().deviceViewAll(),
+				sphPar.propertyId().deviceViewAll(),
+				sphPar.velocity().deviceViewAll(),
+				sphPar.rVelocity().deviceViewAll(),
+				sphPar.contactForce().deviceViewAll(),
+				sphPar.contactTorque().deviceViewAll(),
+				this->boundary().neighborProcPoints().deviceViewAll(),
+				sphPar.diameter().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.propertyId().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.velocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.rVelocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				cfBndry.neighborProcField().deviceViewAll(),
+				ctBndry.neighborProcField().deviceViewAll()
+			);
+			
+			return true;
+		}		
+		else if(step == 3 )
+		{
+			cfBndry.sendBackData();
+			ctBndry.sendBackData();
+			
+			return true;
+		}
+		
+		
+		return false;	
+	}
+	else
+	{
+		
+		if(step == 1 )
+		{
+			const auto & sphPar = this->sphParticles();
+			uint32 thisIndex = this->boundary().thisBoundaryIndex();
+			const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&>(
+			sphPar.contactForce().BoundaryField(thisIndex));
+			const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+			sphPar.contactTorque().BoundaryField(thisIndex));
+
+			cfBndry.recieveBackData();
+			ctBndry.recieveBackData();
+			
+			return false;
+		}
+		else if(step == 11)
+		{
+			const auto & sphPar = this->sphParticles();
+			uint32 thisIndex = this->boundary().thisBoundaryIndex();
+			const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&>(
+			sphPar.contactForce().BoundaryField(thisIndex));
+			const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+			sphPar.contactTorque().BoundaryField(thisIndex));
+
+			cfBndry.addBufferToInternalField();
+			ctBndry.addBufferToInternalField();
+	
+			return true;
+		}
+		
+		
+		return false;
+	}
+
+	return false;
+}
+#else
+
+template <typename cFM, typename gMM>
+bool pFlow::MPI::processorBoundarySphereInteraction<cFM, gMM>::sphereSphereInteraction
+(
+	real dt,
+	const ContactForceModel &cfModel,
+	uint32 step
+)
+{
+	
+	// master processor calculates the contact force/torque and sends data back to the 
+	// neighbor processor (slave processor).
+	// slave processor recieves the data and adds the data to the internalField 
+	if(masterInteraction_) 
+	{
+		if(step==1)return true;
+
+		const auto & sphPar = this->sphParticles();
+		uint32 thisIndex = this->boundary().thisBoundaryIndex();
+		
+		const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&> (
+		sphPar.contactForce().BoundaryField(thisIndex));
+
+		const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+		sphPar.contactTorque().BoundaryField(thisIndex));
+
+		if(step == 2 )
+		{
+			
+			pFlow::MPI::processorBoundarySIKernels::sphereSphereInteraction(
+				"ppBoundaryInteraction."+this->boundaryName(),
+				dt,
+				this->ppPairs(),
+				cfModel,
+				this->boundary().thisPoints(),
+				sphPar.diameter().deviceViewAll(),
+				sphPar.propertyId().deviceViewAll(),
+				sphPar.velocity().deviceViewAll(),
+				sphPar.rVelocity().deviceViewAll(),
+				sphPar.contactForce().deviceViewAll(),
+				sphPar.contactTorque().deviceViewAll(),
+				this->boundary().neighborProcPoints().deviceViewAll(),
+				sphPar.diameter().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.propertyId().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.velocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				sphPar.rVelocity().BoundaryField(thisIndex).neighborProcField().deviceViewAll(),
+				cfBndry.neighborProcField().deviceViewAll(),
+				ctBndry.neighborProcField().deviceViewAll()
+			);
+		
+			return true;
+		}		
+		else if(step == 3 )
+		{
+			cfBndry.sendBackData();
+			ctBndry.sendBackData();
+			return true;
+		}
+		else if(step == 11 )
+		{
+			cfBndry.updateBoundaryFromSlave();
+			ctBndry.updateBoundaryFromSlave();
+			return true;
+		}
+
+		return false;	
+	}
+	else
+	{
+		
+		if(step == 1 )
+		{
+			const auto & sphPar = this->sphParticles();
+			uint32 thisIndex = this->boundary().thisBoundaryIndex();
+			const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&>(
+			sphPar.contactForce().BoundaryField(thisIndex));
+			const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+			sphPar.contactTorque().BoundaryField(thisIndex));
+
+			cfBndry.recieveBackData();
+			ctBndry.recieveBackData();
+			
+			return false;
+		}
+		else if(step == 11)
+		{
+			const auto & sphPar = this->sphParticles();
+			uint32 thisIndex = this->boundary().thisBoundaryIndex();
+			const auto& cfBndry = static_cast<const processorBoundaryField<realx3>&>(
+			sphPar.contactForce().BoundaryField(thisIndex));
+			const auto& ctBndry = static_cast<const processorBoundaryField<realx3>&> (
+			sphPar.contactTorque().BoundaryField(thisIndex));
+
+			cfBndry.addBufferToInternalField();
+			cfBndry.updateBoundaryToMaster();
+
+			ctBndry.addBufferToInternalField();
+			ctBndry.updateBoundaryToMaster();
+
+			return true;
+		}
+				
+		return false;
+	}
+
+	return false;
+}
+
+#endif
\ No newline at end of file
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
new file mode 100644
index 00000000..5d27bd76
--- /dev/null
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteraction.hpp
@@ -0,0 +1,93 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __processorBoundarySphereInteraction_hpp__
+#define __processorBoundarySphereInteraction_hpp__
+
+#include "boundarySphereInteraction.hpp"
+#include "processorBoundaryField.hpp"
+#include "boundaryProcessor.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename contactForceModel,typename geometryMotionModel>
+class processorBoundarySphereInteraction
+:
+    public boundarySphereInteraction<contactForceModel, geometryMotionModel>
+{
+public:
+
+    using PBSInteractionType = 
+        processorBoundarySphereInteraction<contactForceModel,geometryMotionModel>;
+
+    using BSInteractionType = 
+        boundarySphereInteraction<contactForceModel, geometryMotionModel>;
+
+    using GeometryMotionModel 	= typename BSInteractionType::GeometryMotionModel;
+	
+	using ContactForceModel 	= typename BSInteractionType::ContactForceModel;
+
+	using MotionModel 			= typename geometryMotionModel::MotionModel;
+
+	using ModelStorage 			= typename ContactForceModel::contactForceStorage;
+	
+	using IdType 			    = typename BSInteractionType::IdType;
+
+	using IndexType    		    = typename BSInteractionType::IndexType;
+    
+    using ContactListType       = typename BSInteractionType::ContactListType; 
+
+private:
+    
+    bool    masterInteraction_;
+
+public:
+
+    TypeInfoTemplate22("boundarySphereInteraction", "processor",ContactForceModel, MotionModel);
+
+	
+    processorBoundarySphereInteraction(
+        const boundaryBase& boundary,
+		const sphereParticles& sphPrtcls,
+		const GeometryMotionModel& geomMotion
+    );
+
+    add_vCtor
+    (
+        BSInteractionType,
+        PBSInteractionType,
+        boundaryBase
+    );
+    
+    ~processorBoundarySphereInteraction()override = default;
+
+    bool sphereSphereInteraction(
+        real dt,
+		const ContactForceModel& cfModel,
+        uint32 step)override;
+	
+};
+
+}
+
+#include "processorBoundarySphereInteraction.cpp"
+
+
+#endif //__processorBoundarySphereInteraction_hpp__
\ No newline at end of file
diff --git a/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteractions.cpp b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteractions.cpp
new file mode 100644
index 00000000..25347d61
--- /dev/null
+++ b/src/Interaction/sphereInteraction/boundaries/processorBoundarySphereInteraction/processorBoundarySphereInteractions.cpp
@@ -0,0 +1,17 @@
+
+#include "processorBoundarySphereInteraction.hpp"
+#include "geometryMotions.hpp"
+#include "contactForceModels.hpp"
+
+
+template class pFlow::MPI::processorBoundarySphereInteraction
+<
+    pFlow::cfModels::limitedNonLinearNormalRolling,
+    pFlow::rotationAxisMotionGeometry
+>;
+
+template class pFlow::MPI::processorBoundarySphereInteraction
+<
+    pFlow::cfModels::nonLimitedNonLinearNormalRolling,
+    pFlow::rotationAxisMotionGeometry
+>;
\ No newline at end of file
diff --git a/src/Particles/SphereParticles/processorBoundarySphereParticles.cpp b/src/Particles/SphereParticles/processorBoundarySphereParticles.cpp
new file mode 100644
index 00000000..c07b356c
--- /dev/null
+++ b/src/Particles/SphereParticles/processorBoundarySphereParticles.cpp
@@ -0,0 +1,46 @@
+#include "processorBoundarySphereParticles.hpp"
+#include "sphereParticles.hpp"
+#include "boundaryProcessor.hpp"
+
+pFlow::processorBoundarySphereParticles::processorBoundarySphereParticles(
+    const boundaryBase &boundary, 
+    sphereParticles &prtcls
+)
+:
+    boundarySphereParticles(boundary, prtcls)
+{
+
+}
+
+bool pFlow::processorBoundarySphereParticles::acceleration(const timeInfo &ti, const realx3& g)
+{
+    
+
+#ifndef BoundaryModel1
+    
+
+    if(isBoundaryMaster())
+    {
+        auto thisIndex = thisBoundaryIndex();
+        auto mass = Particles().mass().BoundaryField(thisIndex).neighborProcField().deviceView();
+        auto I = Particles().I().BoundaryField(thisIndex).neighborProcField().deviceView();
+        auto cf = Particles().contactForce().BoundaryField(thisIndex).neighborProcField().deviceView();
+        auto ct = Particles().contactTorque().BoundaryField(thisIndex).neighborProcField().deviceView();
+        auto acc = Particles().accelertion().BoundaryField(thisIndex).neighborProcField().deviceView();
+        auto rAcc = Particles().rAcceleration().BoundaryField(thisIndex).neighborProcField().deviceView();
+
+        Kokkos::parallel_for(
+            "processorBoundary::acceleration."+this->boundaryName(),
+            deviceRPolicyStatic(0,mass.size()),
+            LAMBDA_HD(uint32 i){
+                acc[i] = cf[i]/mass[i] + g;
+                rAcc[i] = ct[i]/I[i];
+            });
+        Kokkos::fence();    
+    }
+
+#endif
+    
+
+    return true;
+}
diff --git a/src/Particles/SphereParticles/processorBoundarySphereParticles.hpp b/src/Particles/SphereParticles/processorBoundarySphereParticles.hpp
new file mode 100644
index 00000000..cf4b1955
--- /dev/null
+++ b/src/Particles/SphereParticles/processorBoundarySphereParticles.hpp
@@ -0,0 +1,38 @@
+#ifndef __processorBoundarySphereParticles_hpp__
+#define __processorBoundarySphereParticles_hpp__
+
+#include "boundarySphereParticles.hpp"
+
+namespace pFlow
+{
+
+class processorBoundarySphereParticles
+:
+    public boundarySphereParticles
+{
+
+public:
+
+    /// type info
+    TypeInfo("boundarySphereParticles<MPI,processor>");
+
+    processorBoundarySphereParticles(
+        const boundaryBase &boundary,
+        sphereParticles&  prtcls
+    );
+
+    add_vCtor(
+        boundarySphereParticles,
+        processorBoundarySphereParticles,
+        boundaryBase
+    );
+
+    bool acceleration(const timeInfo& ti, const realx3& g)override;
+    
+
+};
+
+}
+
+
+#endif
\ No newline at end of file
diff --git a/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.cpp b/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.cpp
new file mode 100644
index 00000000..056d314c
--- /dev/null
+++ b/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.cpp
@@ -0,0 +1,70 @@
+#include "MPIParticleIdHandler.hpp"
+#include "procCommunication.hpp"
+
+pFlow::MPI::MPIParticleIdHandler::MPIParticleIdHandler
+(
+    pointStructure& pStruct
+)
+:
+    particleIdHandler(pStruct)
+{
+    initialIdCheck();
+}
+
+pFlow::Pair<pFlow::uint32, pFlow::uint32> 
+    pFlow::MPI::MPIParticleIdHandler::getIdRange(uint32 nNewParticles)
+{
+    uint32 startId;
+    if(maxId_==-1)
+    {
+        startId = 0;
+    }
+    else
+    {
+        startId = maxId_+1;
+    }
+    uint32 endId = startId+nNewParticles-1;
+    maxId_ = endId;
+    return {startId, endId};
+}
+
+bool pFlow::MPI::MPIParticleIdHandler::initialIdCheck()
+{
+    /// empty point structure / no particles in simulation
+    uint32 maxId = -1;
+    if( !pStruct().empty() ) 
+    {
+        maxId = max( *this );
+    }    
+    
+    auto maxIdAll = procVector<uint32>(pFlowProcessors());
+    auto numAll = procVector<uint32>(pFlowProcessors());
+    auto comm = procCommunication(pFlowProcessors());
+
+    comm.collectAllToAll(maxId, maxIdAll);
+    comm.collectAllToAll(size(),numAll);
+    
+    uint32 n = 0;
+    for(uint32 i=0; i<maxIdAll.size(); i++)
+    {
+        if( maxIdAll[i]==-1 && numAll[i]!= 0)
+        {
+            if(comm.localRank() == i)
+            {
+                fillSequence(*this, n);
+                maxId_ = size()-1 + n;
+            }
+            
+        }
+        else
+        {
+            if(comm.localRank() == i)
+            {
+                maxId_ = maxIdAll[i];
+            }
+        }
+        n += numAll[i];
+    }
+    
+    return true;
+}
diff --git a/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.hpp b/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.hpp
new file mode 100644
index 00000000..2931b213
--- /dev/null
+++ b/src/Particles/particles/MPIParticleIdHandler/MPIParticleIdHandler.hpp
@@ -0,0 +1,60 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __MPIParticleIdHandler_hpp__
+#define __MPIParticleIdHandler_hpp__
+
+#include "particleIdHandler.hpp"
+
+namespace pFlow::MPI
+{
+
+class MPIParticleIdHandler : public particleIdHandler
+{
+private:
+
+	uint32 maxId_ = -1;
+
+	bool   initialIdCheck() override;
+
+public:
+
+	ClassInfo("particleIdHandler<MPI>");
+
+	explicit MPIParticleIdHandler(pointStructure& pStruct);
+
+	~MPIParticleIdHandler() override = default;
+
+	add_vCtor(
+    particleIdHandler, 
+    MPIParticleIdHandler, 
+    pointStructure
+  );
+
+	Pair<uint32, uint32> getIdRange(uint32 nNewParticles) override;
+
+	uint32               maxId() const override
+	{
+		return maxId_;
+	}
+};
+
+}
+
+#endif //__MPIParticleIdHandler_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/CMakeLists.txt b/src/phasicFlow/CMakeLists.txt
index db66642e..5af3b7dd 100644
--- a/src/phasicFlow/CMakeLists.txt
+++ b/src/phasicFlow/CMakeLists.txt
@@ -1,4 +1,3 @@
-
 list(APPEND SourceFiles 
 types/basicTypes/bTypesFunctions.cpp
 types/basicTypes/Logical.cpp
@@ -119,35 +118,27 @@ set(link_libs)
 set(link_libs Kokkos::kokkos tbb)
 	
 
-
+# for MPI parallelization
 if(pFlow_Build_MPI)
 
-	set(Zoltan_Install_DIR)
-	if(DEFINED ENV{Zoltan_DIR})
-	   set(Zoltan_Install_DIR $ENV{Zoltan_DIR})
-	else()
-	  set(Zoltan_Install_DIR $ENV{HOME}/PhasicFlow/Zoltan)
-	endif()
-	message(STATUS "Zoltan install directory is ${Zoltan_Install_DIR}")
-
-	set(ZOLTAN_PREFIX "${Zoltan_Install_DIR}" CACHE STRING "Zoltan install directory")
-
-	find_path(ZOLTAN_INCLUDE_DIR zoltan.h PATHS "${ZOLTAN_PREFIX}/include")
-
-	message(STATUS "Zoltan include path: ${ZOLTAN_INCLUDE_DIR}")
-
-	find_library(ZOLTAN_LIBRARY zoltan PATHS "${ZOLTAN_PREFIX}/lib")
-	message(STATUS "Zoltan lib path: ${ZOLTAN_LIBRARY}")
+	# Include the Zoltan installation check macro
+	include(${CMAKE_SOURCE_DIR}/cmake/zoltanInstallCheck.cmake)
+	
+	# set the Zoltan Directory and check/build if needed
+	set(Zoltan_Install_DIR ${CMAKE_SOURCE_DIR}/thirdParty/Zoltan)
+	
+	# Call the macro to find or build Zoltan
+	zoltan_find_or_build(${Zoltan_Install_DIR})
 	
 	list(APPEND SourceFiles
-	MPIParallelization/domain/partitioning/partitioning.cpp 
-	MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
-	MPIParallelization/domain/MPISimulationDomain.cpp
-    MPIParallelization/dataIOMPI/dataIOMPIs.cpp
-    MPIParallelization/MPI/procCommunication.cpp
-    MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
-	MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
-	MPIParallelization/pointField/processorBoundaryFields.cpp
+		MPIParallelization/domain/partitioning/partitioning.cpp 
+		MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
+		MPIParallelization/domain/MPISimulationDomain.cpp
+		MPIParallelization/dataIOMPI/dataIOMPIs.cpp
+		MPIParallelization/MPI/procCommunication.cpp
+		MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
+		MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
+		MPIParallelization/pointField/processorBoundaryFields.cpp
 	)
 	
 	list(APPEND link_libs MPI::MPI_CXX ${ZOLTAN_LIBRARY} -lm )
@@ -155,8 +146,10 @@ if(pFlow_Build_MPI)
 	target_include_directories(phasicFlow PUBLIC ./globals ${ZOLTAN_INCLUDE_DIR})
 
 else()
-	pFlow_add_library_install(phasicFlow SourceFiles link_libs)
+	
+pFlow_add_library_install(phasicFlow SourceFiles link_libs)
 	target_include_directories(phasicFlow PUBLIC ./globals)
+
 endif()
 
 
diff --git a/src/phasicFlow/MPIParallelization/MPI/gatherMaster.hpp b/src/phasicFlow/MPIParallelization/MPI/gatherMaster.hpp
new file mode 100644
index 00000000..dc87ec01
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/gatherMaster.hpp
@@ -0,0 +1,106 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __gatherMaster_hpp__
+#define __gatherMaster_hpp__
+
+#include <numeric>
+
+#include "procCommunication.hpp"
+#include "stdVectorHelper.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class gatherMaster
+:
+    public procCommunication
+{
+protected:
+
+    std::vector<T> buffer_;
+
+public:
+
+    gatherMaster(const localProcessors& procs)
+    :
+        procCommunication(procs)
+    {}
+
+    span<T> getData()
+    {
+        if(this->localMaster())
+            return span<T>( buffer_.data(), buffer_.size());
+        else
+            return span<T>(nullptr, 0);
+    }
+
+    std::vector<T> moveData()
+    {
+        return std::move(buffer_);
+    }
+
+    bool gatherData(span<T> data)
+    {
+        int thisN = data.size();
+
+        bool succss;
+        
+        procVector<int> numElems(this->processors(), true);
+        procVector<int> displ(this->processors(), true);      
+        
+        if( !this->collectAllToMaster(thisN, numElems) )
+        {
+            fatalErrorInFunction<<
+            "error in collecting number of elements from processors"<<endl;
+            return false;
+        }
+        auto totalN = std::accumulate(
+            numElems.begin(), 
+            numElems.end(),
+            static_cast<int>(0));
+        
+        buffer_.resize(totalN);
+    
+        std::exclusive_scan(
+            numElems.begin(), 
+            numElems.end(),
+            displ.begin(),
+            0);
+        
+        auto bufferSpan = span<T>(this->buffer_.data(),this->buffer_.size() );
+
+        return CheckMPI( 
+            Gatherv(
+                data, 
+                bufferSpan, 
+                numElems.getSpan(), 
+                displ.getSpan(), 
+                this->localMasterNo(), 
+                this->localCommunicator()),
+            false);
+        
+    }
+
+
+};
+}
+
+#endif
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp b/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
new file mode 100644
index 00000000..2ef1caed
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/mpiCommunication.hpp
@@ -0,0 +1,463 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __mpiCommunication_H__
+#define __mpiCommunication_H__
+
+
+#include "mpiTypes.hpp"
+#include "types.hpp"
+#include "span.hpp"
+
+
+
+namespace pFlow::MPI
+{
+
+extern DataType realx3Type__;
+
+extern DataType realx4Type__;
+
+extern DataType int32x3Type__;
+
+extern DataType uint32x3Type__;
+
+template<typename T> 
+auto constexpr Type()
+{
+	return MPI_BYTE;
+}
+
+template<typename T>
+auto constexpr sFactor()
+{
+	return sizeof(T);
+}
+
+template<char> 
+auto constexpr Type()
+{
+	return MPI_CHAR;
+}
+template<char>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<short>
+auto constexpr Type()
+{
+	return MPI_SHORT;
+}
+template<short>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<unsigned short>
+auto constexpr Type()
+{
+	return MPI_UNSIGNED_SHORT;
+}
+template<unsigned short>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<int>
+auto constexpr Type()
+{
+	return MPI_INT;
+}
+template<int>
+auto constexpr sFactor()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<unsigned int>()
+{
+	return MPI_UNSIGNED;
+}
+template<>
+auto constexpr sFactor<unsigned int>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<long>()
+{
+	return MPI_LONG;
+}
+template<>
+auto constexpr sFactor<long>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<unsigned long>()
+{
+	return MPI_UNSIGNED_LONG;
+}
+template<>
+auto constexpr sFactor<unsigned long>()
+{
+	return 1;
+}
+
+
+template<>
+auto constexpr Type<float>()
+{
+	return MPI_FLOAT;
+}
+template<>
+auto constexpr sFactor<float>()
+{
+	return 1;
+}
+
+template<>
+auto constexpr Type<double>()
+{
+	return MPI_DOUBLE;
+}
+template<>
+auto constexpr sFactor<double>()
+{
+	return 1;
+}
+
+template<>
+inline
+auto Type<realx3>()
+{
+	return realx3Type__;
+}
+
+template<>
+auto constexpr sFactor<realx3>()
+{
+	return 1;
+}
+
+template<>
+inline
+auto Type<realx4>()
+{
+	return realx4Type__;
+}
+
+template<>
+auto constexpr sFactor<realx4>()
+{
+	return 1;
+}
+
+
+template<>
+inline
+auto Type<int32x3>()
+{
+	return int32x3Type__;
+}
+
+
+template<>
+auto constexpr sFactor<int32x3>()
+{
+	return 1;
+}
+
+template<>
+inline
+auto Type<uint32x3>()
+{
+	return uint32x3Type__;
+}
+
+
+template<>
+auto constexpr sFactor<uint32x3>()
+{
+	return 1;
+}
+
+/*inline 
+auto createByteSequence(int sizeOfElement)
+{
+    DataType newType;
+    MPI_Type_contiguous(sizeOfElement, MPI_CHAR, &newType);
+	MPI_Type_commit(&newType);
+    return newType;
+}*/
+
+inline 
+auto TypeCommit(DataType* type)
+{
+	return MPI_Type_commit(type);
+}
+
+inline 
+auto TypeFree(DataType* type)
+{
+    return MPI_Type_free(type);
+
+}
+
+template<typename T>
+inline auto getCount(Status* status, int& count)
+{
+	int lCount;
+	auto res = MPI_Get_count(status, Type<T>(), &lCount);
+	count = lCount/sFactor<T>();
+	return res;
+}
+
+template<typename T>
+inline int convertIndex(const int& ind)
+{
+	return ind*sFactor<T>();
+}
+
+template<typename T> 
+inline auto send(span<T> data, int dest, int tag, Comm comm)
+{
+	return MPI_Send(
+        data.data(), 
+        sFactor<T>()*data().size(), 
+        Type<T>(), 
+        dest, 
+        tag, 
+        comm);
+}
+
+template<typename T> 
+inline auto send(const T& data, int dest, int tag, Comm comm)
+{
+	return MPI_Send(
+        &data, 
+        sFactor<T>(), 
+        Type<T>(), 
+        dest, 
+        tag, 
+        comm);
+}
+
+template<typename T>
+inline auto Isend(span<T> data, int dest, int tag, Comm comm, Request* req)
+{
+	return MPI_Isend(
+		data.data(), 
+		sFactor<T>()*data.size(), 
+		Type<T>(), 
+		dest, 
+		tag, 
+		comm, 
+		req);
+}
+
+template<typename T>
+inline auto Isend(const T& data, int dest, int tag, Comm comm, Request* req)
+{
+	return MPI_Isend(
+		&data, 
+		sFactor<T>(), 
+		Type<T>(), 
+		dest, 
+		tag, 
+		comm, 
+		req);
+}
+
+template<typename T>
+inline auto recv(span<T> data, int source, int tag, Comm comm, Status *status)
+{
+	return MPI_Recv(
+        data.data(), 
+        sFactor<T>()*data.size(), 
+        Type<T>(), 
+        source, 
+        tag, 
+        comm, 
+        status);
+}
+
+template<typename T>
+inline auto recv(T& data, int source, int tag, Comm comm, Status *status)
+{
+	return MPI_Recv(
+        &data, 
+        sFactor<T>(), 
+        Type<T>(), 
+        source, 
+        tag, 
+        comm, 
+        status);
+}
+
+template<typename T>
+inline auto Irecv(T& data, int source, int tag, Comm comm, Request* req)
+{
+	return MPI_Irecv(
+		&data,
+		sFactor<T>(),
+		Type<T>(),
+		source,
+		tag, 
+		comm,
+		req);
+}
+
+template<typename T>
+inline auto Irecv(span<T> data, int source, int tag, Comm comm, Request* req)
+{
+	return MPI_Irecv(
+		data.data(),
+		sFactor<T>()*data.size(),
+		Type<T>(),
+		source,
+		tag, 
+		comm,
+		req);
+}
+
+template<typename T>
+inline auto scan(T sData, T& rData, Comm comm, Operation op = SumOp)
+{
+	return MPI_Scan(&sData, &rData, sFactor<T>()*1, Type<T>(), op , comm );
+}
+
+// gathering one scalar data to root processor 
+template<typename T>
+inline auto gather(T sendData, span<T>& recvData, int root, Comm comm)
+{
+	return MPI_Gather(
+		&sendData, 
+		sFactor<T>()*1, 
+		Type<T>(), 
+		recvData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		root,
+		comm);
+}
+
+template<typename T>
+inline auto allGather(T sendData, span<T>& recvData, Comm comm)
+{
+	return MPI_Allgather(
+		&sendData,
+		sFactor<T>()*1,
+		Type<T>(),
+		recvData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		comm);
+}
+
+template<typename T>
+inline auto scatter(span<T> sendData, T& recvData, int root, Comm comm)
+{
+	return MPI_Scatter(
+		sendData.data(),
+		sFactor<T>()*1,
+		Type<T>(),
+		&recvData,
+		sFactor<T>()*1,
+		Type<T>(),
+		root,
+		comm);
+}
+
+template<typename T>
+inline auto Bcast(T& sendData, int root, Comm comm)
+{
+	return MPI_Bcast(
+		&sendData, sFactor<T>()*1, Type<T>(), root, comm);
+
+}
+
+
+template<typename T> 
+bool typeCreateIndexedBlock(
+    span<int32> index, 
+    DataType &newType)
+{
+    auto res =  MPI_Type_create_indexed_block(
+        index.size(), 
+        sFactor<T>(), 
+        index.data(), 
+        Type<T>(), 
+        &newType);
+    
+    if(res == Success)
+    {
+        TypeCommit(&newType);
+    }
+    else
+    {
+        return false;
+    }
+
+    return true;	
+}
+
+
+template<typename T>
+inline auto Gatherv
+(
+    span<T> sendData, 
+    span<T>& recvData, 
+    span<int> recvCounts,
+    span<int> displs,
+    int root, 
+    Comm comm)
+{
+    
+    return MPI_Gatherv(
+        sendData.data(), 
+        sendData.size()*sFactor<T>(),
+        Type<T>(),
+        recvData.data(),
+        recvCounts.data(),
+        displs.data(),
+        Type<T>(),
+        root,
+        comm
+         );
+
+}
+
+inline auto Wait(Request* request, Status* status)
+{
+	return MPI_Wait(request, status);
+}
+
+
+}
+
+
+#endif  //__mpiCommunication_H__
diff --git a/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp b/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp
new file mode 100644
index 00000000..05b45e93
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/mpiTypes.hpp
@@ -0,0 +1,71 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __mpiTypes_H__
+#define __mpiTypes_H__
+
+
+
+#include <mpi.h>
+
+namespace pFlow::MPI
+{
+	// types
+	using Comm 			= MPI_Comm;
+    using Group         = MPI_Group;
+	using Status 		= MPI_Status;
+	using Offset 		= MPI_Offset;
+	using Request 		= MPI_Request;
+	using Operation 	= MPI_Op;
+	using Information	= MPI_Info;
+	using DataType 		= MPI_Datatype;
+	
+	inline Comm CommWorld 		= MPI_COMM_WORLD;
+
+	// all nulls
+
+	inline auto ProcNull 		= MPI_PROC_NULL;
+	inline auto InfoNull  		= MPI_INFO_NULL;
+	inline auto RequestNull		= MPI_REQUEST_NULL;
+	inline auto StatusIgnore 	= MPI_STATUS_IGNORE;
+	inline auto StatusesIgnore 	= MPI_STATUSES_IGNORE;
+	inline auto FileNull 		= MPI_FILE_NULL;
+	inline Comm  CommNull 		= MPI_COMM_NULL;
+    inline auto TypeNull        = MPI_DATATYPE_NULL;
+
+	// errors
+	inline const auto Success 	= MPI_SUCCESS;
+	inline const auto ErrOp 	= MPI_ERR_OP;
+
+	inline const auto SumOp		= MPI_SUM;
+	inline const auto MaxOp 	= MPI_MAX;
+	inline const auto MinOp 	= MPI_MIN;
+
+	inline const size_t MaxNoProcessors = 2048;
+	
+}
+
+
+
+
+
+
+
+#endif //__mpiTypes_H__
diff --git a/src/phasicFlow/MPIParallelization/MPI/procCommunication.cpp b/src/phasicFlow/MPIParallelization/MPI/procCommunication.cpp
new file mode 100644
index 00000000..81869453
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/procCommunication.cpp
@@ -0,0 +1,30 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "procCommunication.hpp"
+
+
+pFlow::MPI::procCommunication::procCommunication
+(
+    const localProcessors& proc
+)
+:
+    processors_(proc)
+{}
diff --git a/src/phasicFlow/MPIParallelization/MPI/procCommunication.hpp b/src/phasicFlow/MPIParallelization/MPI/procCommunication.hpp
new file mode 100644
index 00000000..80c0f513
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/procCommunication.hpp
@@ -0,0 +1,178 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __procCommunication_hpp__
+#define __procCommunication_hpp__
+
+
+#include "procVector.hpp"
+#include "localProcessors.hpp"
+#include "mpiCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+
+class procCommunication
+{
+protected:
+
+	const localProcessors& processors_;
+
+public:	
+
+    procCommunication(const localProcessors& proc);
+    	
+	~procCommunication()=default;
+    
+    /// @brief Tell if this processor is master processor in the local
+    /// communicator 
+    /// @return true if this processor is master  
+
+    inline 
+    const auto& processors()const
+    {
+        return processors_;
+    }
+
+    inline
+    bool localMaster()const
+    {
+        return processors_.localMaster();;
+    }
+
+    inline
+    auto localSize()const
+    {
+        return processors_.localSize();
+    }
+
+    inline
+    auto localRank()const
+    {
+        return processors_.localRank();
+    }
+
+    inline
+    auto localCommunicator()const
+    {
+        return processors_.localCommunicator();
+    }
+
+    /// @brief return the master number in the local communicator  
+    auto localMasterNo()const
+    {
+        return processors_.localMasterNo();
+    }
+
+	/// Send a single val to all processors including itself (local communicator)
+	template<typename T>
+	std::pair<T,bool> distributeMasterToAll(const T& val)
+	{
+		
+		T retVal = val;
+		auto res = CheckMPI(
+            Bcast(retVal, localMasterNo(),localCommunicator() ),
+            false);
+		
+		return {retVal, res};
+	}
+
+	/// @brief  Send a single value to all processor including master (in local communicator)
+	/// @param val value to be sent
+	/// @param recvVal recieved value 
+	/// @return true if successful and false if fail
+	template<typename T>
+	bool distributeMasterToAll(const T& val, T& recvVal)
+	{
+		recvVal = val;
+		return CheckMPI(
+            Bcast(recvVal, localMasterNo(), localCommunicator()),
+            false);
+	}
+
+	/// @brief  values in the vector (size is equal to number of 
+    // processors in local communicator) to each processor 
+	template<typename T>
+	std::pair<T,bool> distributeMasterToAll(const procVector<T>& vals)
+	{
+		T val;	
+		auto vec = vals.getSpan();
+		auto res = CheckMPI(
+            scatter(vec, val, localMasterNo(), localCommunicator()),
+            false);
+		
+		return {val, res};
+	}
+
+    /// @brief Each processor in the local communicator calls this funtion with a value 
+    /// and the values are distributed among all processors 
+	template<typename T>
+	std::pair<procVector<T>, bool> collectAllToAll(const T& val)
+	{
+		procVector<T> allVec(processors_);
+		auto vec = allVec.getSpan();
+		auto res = CheckMPI(
+            allGather(val, vec, localCommunicator()), 
+            false);
+		return {allVec, res};
+	}
+
+    /// @brief Each processor in the local communicator calls this funtion with a value 
+    /// and the values are distributed among all processors 
+	template<typename T>
+	bool collectAllToAll(const T& val, procVector<T>& allVec)
+	{
+		auto vec = allVec.getSpan();
+		return CheckMPI(
+            allGather(val, vec, localCommunicator()), 
+            false);
+	}
+
+    /// @brief Each processor in the local communicator calls this function with a value
+    /// and all values are collected in the master processor 
+	template<typename T>
+	std::pair<procVector<T>,bool> collectAllToMaster(const T& val)
+	{
+		// only on master processor
+		procVector<T> masterVec(processors_, true);
+		
+		auto masterSpan = masterVec.getSpan();
+		auto res = CheckMPI( 
+			gather(val,masterSpan, localMasterNo(), localCommunicator()), 
+            false);
+
+		return {masterVec, res};
+
+	}
+
+    template<typename T>
+	bool collectAllToMaster(const T& val, procVector<T>& masterVec)
+	{
+		// only on master processor
+		auto [vec, res] = collectAllToMaster(val);
+        masterVec = vec;
+		return res;
+	}
+
+}; //procCommunication
+
+} // pFlow::MPI
+
+#endif //__procCommunication_hpp__
diff --git a/src/phasicFlow/MPIParallelization/MPI/procVector.hpp b/src/phasicFlow/MPIParallelization/MPI/procVector.hpp
new file mode 100644
index 00000000..f9a80037
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/procVector.hpp
@@ -0,0 +1,199 @@
+#ifndef __procVector_hpp__ 
+#define __procVector_hpp__
+
+// from PhasicFlow
+
+#include "localProcessors.hpp"
+#include "span.hpp"
+#include "streams.hpp"
+#include "IOPattern.hpp"
+
+#include "mpiTypes.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class procVector
+:
+	public std::vector<T>
+{
+public:
+
+	using ProcVectorType = procVector<T>;
+
+	using VectorType = std::vector<T>;
+
+protected:
+
+	int 	rank_ 	= 0;
+
+    bool 	isMaster_ = false;
+
+	using VectorType::reserve;
+
+	using VectorType::resize;
+
+	using VectorType::assign;
+
+	using VectorType::clear;
+
+	using VectorType::erase;
+
+public:
+
+	procVector(
+		const localProcessors& procs,
+		bool onlyMaster = false)
+    :
+        rank_(procs.localRank()),
+        isMaster_(procs.localMaster())
+	{
+       
+		if( onlyMaster && !isMaster_ ) return;
+		this->reserve(procs.localSize());
+		this->resize(procs.localSize());
+	}
+
+	procVector(
+		const T& val,
+		const localProcessors& procs,
+		bool onlyMaster = false)
+	:
+		procVector(procs, onlyMaster)
+	{
+		std::fill(this->begin(), this->end(), val);
+	}
+
+    procVector(const T& val, const procVector& src)
+    {
+        this->reserve(src.size());
+        this->resize(src.size());
+        std::fill(this->begin(), this->end(), val);
+    }
+
+    procVector(const localProcessors& procs, const VectorType& src)
+	:
+		procVector(procs)
+	{
+		if(src.size()!= this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in construction"<<endl;
+			fatalExit;
+		}
+
+        this->assign(src.begin(), src.end());
+	} 
+        
+	procVector(const procVector&) = default;
+	
+	procVector(procVector&&) = default;
+
+	procVector& operator=(const procVector&) = default;
+	
+	procVector& operator=(procVector&&) = default;
+
+	procVector& operator=(const VectorType& src)
+	{
+		if(src.size() != this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in copy assignment"<<endl;
+			fatalExit;
+		}
+
+		static_cast<VectorType&>(*this).operator=(src);
+		return *this;
+	}
+
+	procVector& operator=(VectorType&& src)
+	{
+		if(src.size() != this->size())
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in move assignment"
+			<<endl;
+			fatalExit;
+		}
+
+		static_cast<VectorType&>(*this).operator=(std::move(src));
+		return *this;
+	}
+
+	procVector(const localProcessors& procs, VectorType&& src)
+	:
+		VectorType(std::move(src))
+	{
+		if(this->size()!= static_cast<size_t>(procs.localSize()))
+		{
+			fatalErrorInFunction<<
+			"Size of std::vector and procVector does not match in move"<<endl;
+            fatalExit;
+		}
+        isMaster_ = procs.localMaster();
+        rank_ = procs.localRank();
+	}
+
+	~procVector()=default;
+
+    inline
+	auto& thisValue()
+	{
+		return VectorType::operator[](rank_);
+	}
+
+    inline
+	const auto& thisValue()const
+	{
+		return VectorType::operator[](rank_);
+	}
+
+    inline
+	auto size()const
+	{
+		return VectorType::size();
+	}
+
+    inline
+	auto rank()const
+	{
+		return rank_;
+	}
+
+	inline
+	auto getSpan()
+	{
+		return span<T>(this->data(), this->size());
+	}
+	
+	inline 
+	auto getSpan()const
+	{
+		return span<T>(const_cast<T*>(this->data()), this->size());
+	}
+
+    bool write(
+        iOstream& os,
+        const IOPattern& iop ) const
+    {
+        return writeStdVector(os, *this, iop);	
+    }
+
+};
+
+template<typename T> 
+inline iOstream& operator << (iOstream& os, const procVector<T>& ovec )
+{	
+	if( !ovec.write(os, IOPattern::AllProcessorsDifferent) )
+	{
+		ioErrorInFile(os.name(), os.lineNumber());
+		fatalExit;
+	}
+	return os; 
+}
+
+}
+
+
+#endif
diff --git a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.cpp b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.cpp
new file mode 100644
index 00000000..a771dc54
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.cpp
@@ -0,0 +1,158 @@
+
+
+template<typename T>
+pFlow::MPI::scatteredMasterDistribute<T>::scatteredMasterDistribute
+(
+    const localProcessors& procs
+)
+:
+    procCommunication(procs),
+    indexedMap_(TypeNull, procs, true)
+{
+    
+}
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::setDataMaps
+(
+    procVector<span<uint32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        std::vector<int32> index;
+
+        freeIndexedMap();
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i];
+            }
+
+            DataType dt;
+            
+            if(! typeCreateIndexedBlock<T>( makeSpan(index), dt)) 
+            {
+                fatalErrorInFunction;
+                return false;
+            }
+            else
+            {
+                indexedMap_[proc] = dt;
+            }
+        }
+    }
+    return true;
+}
+
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::setDataMaps
+(
+    procVector<span<int32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+        
+        freeIndexedMap();
+        
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            DataType dt;
+            if( !typeCreateIndexedBlock<T>(maps[proc], dt) )
+            {
+                fatalErrorInFunction;
+                return false;
+            }
+            else
+            {
+                indexedMap_[proc] = dt;
+            }
+        }
+    }
+    return true;
+}
+
+template<typename T>
+void pFlow::MPI::scatteredMasterDistribute<T>::freeIndexedMap()
+{
+    for(auto i=0; i<indexedMap_.size(); i++)
+    {
+        if(indexedMap_[i]!= TypeNull)
+        {
+            TypeFree(&indexedMap_[i]);
+            indexedMap_[i] = TypeNull;
+        }
+    }
+}
+
+
+template<typename T>
+bool pFlow::MPI::scatteredMasterDistribute<T>::distribute
+(
+    span<T>& sendBuff, 
+    span<T>& recvb
+)
+{
+    procVector<Request> requests(processors(), true);
+    procVector<Status> statuses(processors(), true);
+
+    if(this->localMaster())
+    {
+        bool res = true;
+        for(int32 i = indexedMap_.size()-1; i>=0; i--)
+        {
+            res = res&&CheckMPI(
+                MPI_Issend( 
+                    sendBuff.data(), 
+                    1, 
+                    indexedMap_[i], 
+                    i, 
+                    0, 
+                    localCommunicator(),
+                    &requests[i]), 
+                false);
+        }
+
+        if(!res)return false;		
+    }
+
+    Status stat;
+    bool sucss = CheckMPI( 
+        MPI_Recv(
+            recvb.data(), 
+            recvb.size()*sFactor<T>(), 
+            Type<T>(), 
+            0, 
+            0, 
+            localCommunicator(),
+            &stat),
+        false);
+            
+    if(this->localMaster())
+    {
+        CheckMPI(
+            MPI_Waitall(requests.size(), requests.data(), statuses.data()),
+            false
+            );
+    }
+
+    return sucss;
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.hpp b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.hpp
new file mode 100644
index 00000000..146ce56c
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistribute.hpp
@@ -0,0 +1,67 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __scatteredMasterDistribute_hpp__
+#define __scatteredMasterDistribute_hpp__
+
+#include "mpiCommunication.hpp"
+#include "procCommunication.hpp"
+#include "procVector.hpp"
+#include "stdVectorHelper.hpp"
+#include "streams.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class scatteredMasterDistribute : public procCommunication
+{
+protected:
+
+	procVector<DataType> indexedMap_;
+
+	void freeIndexedMap();
+
+public:
+
+	scatteredMasterDistribute(const localProcessors& procs);
+
+	~scatteredMasterDistribute()
+	{
+		freeIndexedMap();
+	}
+
+	scatteredMasterDistribute(const scatteredMasterDistribute&) = delete;
+
+	scatteredMasterDistribute& operator=(const scatteredMasterDistribute&) =
+	  delete;
+
+	bool setDataMaps(procVector<span<uint32>>& maps);
+
+	bool setDataMaps(procVector<span<int32>>& maps);
+
+	bool distribute(span<T>& sendBuff, span<T>& recvb);
+};
+
+} // pFlow::MPI
+
+#include "scatteredMasterDistribute.cpp"
+
+#endif //__scatteredMasterDistribute_hpp__
diff --git a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.cpp b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
new file mode 100644
index 00000000..7579e8d5
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.cpp
@@ -0,0 +1,166 @@
+
+#include "scatteredMasterDistributeChar.hpp"
+
+pFlow::MPI::scatteredMasterDistribute<char>::scatteredMasterDistribute
+(
+    size_t  sizeOfElement,
+    const localProcessors& procs
+)
+:
+    procCommunication(procs),
+    indexedMap_(TypeNull, procs, true),
+    sizeOfElement_(sizeOfElement)
+{}
+
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::setDataMaps
+(
+    procVector<span<uint32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        freeIndexedMap();
+
+        std::vector<MPI_Aint> index;
+
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i]*sizeOfElement_;
+            }
+            
+            DataType dt;
+            MPI_Type_create_hindexed_block(
+                m.size(), 
+                sizeOfElement_, 
+                index.data(), 
+                MPI_BYTE, 
+                &dt);
+            MPI_Type_commit(&dt);
+                
+            indexedMap_[proc] = dt;
+            
+        }
+    }
+    
+    return true;
+}
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::setDataMaps
+(
+    procVector<span<int32>>& maps
+)
+{
+    if(this->localMaster())
+    {
+        if(maps.size() != this->localSize() )
+        {
+            fatalErrorInFunction<<"size mismatch";
+            return false;
+        }
+
+        std::vector<MPI_Aint> index;
+        freeIndexedMap();
+        
+        for(auto proc = 0; proc< maps.size(); proc++)
+        {
+            
+            auto m = maps[proc];
+            index.resize(m.size());
+            for(auto i=0; i<index.size(); i++ )
+            {
+                index[i] = m[i]*sizeOfElement_;
+            }
+            
+            DataType dt;
+            MPI_Type_create_hindexed_block(
+                index.size(), 
+                sizeOfElement_, 
+                index.data(), 
+                MPI_CHAR, 
+                &dt);
+            MPI_Type_commit(&dt);
+                
+            indexedMap_[proc] = dt;
+            
+        }
+    }
+
+    return true;
+}
+
+
+void pFlow::MPI::scatteredMasterDistribute<char>::freeIndexedMap()
+{
+    for(auto i=0; i<indexedMap_.size(); i++)
+    {
+        if(indexedMap_[i]!= TypeNull)
+        {
+            TypeFree(&indexedMap_[i]);
+            indexedMap_[i] = TypeNull;
+        }
+    }
+}
+
+bool pFlow::MPI::scatteredMasterDistribute<char>::distribute
+(
+    span<char>& sendBuff, 
+    span<char>& recvb
+)
+{
+    procVector<Request> requests(processors(), true);
+    procVector<Status> statuses(processors(), true);
+
+
+    if(this->localMaster())
+    {
+        bool res = true;
+        for(int32 i = indexedMap_.size()-1; i>=0; i--)
+        {
+            res = res&&CheckMPI(
+                MPI_Issend( 
+                    sendBuff.data(), 
+                    1, 
+                    indexedMap_[i], 
+                    i, 
+                    0, 
+                    localCommunicator(),
+                    &requests[i]), 
+                false);
+        }
+
+        if(!res)return false;		
+    }
+
+    Status stat;
+    bool sucss = CheckMPI( 
+        MPI_Recv(
+            recvb.data(), 
+            recvb.size(), 
+            MPI_CHAR, 
+            0, 
+            0, 
+            localCommunicator(),
+            &stat),
+        true); 
+    
+    if(this->localMaster())
+    {
+        CheckMPI(
+            MPI_Waitall(requests.size(), requests.data(), statuses.data()),
+            false
+            );
+    }
+    
+    return sucss;
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.hpp b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.hpp
new file mode 100644
index 00000000..0ea1a770
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/MPI/scatteredMasterDistributeChar.hpp
@@ -0,0 +1,66 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __scatteredMasterDistributeChar_hpp__
+#define __scatteredMasterDistributeChar_hpp__
+
+#include "scatteredMasterDistribute.hpp"
+
+namespace pFlow::MPI
+{
+
+template<>
+class scatteredMasterDistribute<char> : public procCommunication
+{
+protected:
+
+	procVector<DataType> indexedMap_;
+
+	size_t               sizeOfElement_;
+
+	void freeIndexedMap();
+
+public:
+
+	scatteredMasterDistribute(
+	  size_t                 sizeOfElement,
+	  const localProcessors& procs
+	);
+
+	~scatteredMasterDistribute()
+	{
+		freeIndexedMap();
+	}
+
+	scatteredMasterDistribute(const scatteredMasterDistribute&) = delete;
+
+	scatteredMasterDistribute& operator=(const scatteredMasterDistribute&) =
+	  delete;
+
+	bool setDataMaps(procVector<span<uint32>>& maps);
+
+	bool setDataMaps(procVector<span<int32>>& maps);
+
+	bool distribute(span<char>& sendBuff, span<char>& recvb);
+};
+
+} // pFlow::MPI
+
+#endif //__scatteredMasterDistributeChar_hpp__
diff --git a/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.cpp b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.cpp
new file mode 100644
index 00000000..eb5e074c
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.cpp
@@ -0,0 +1,52 @@
+
+template<typename T>
+bool pFlow::MPI::dataIOMPI<T>::gatherData(span<T> data )
+{
+	
+	if(this->ioPattern_.isAllProcessorsDifferent())
+	{
+		this->bufferSpan_ = data;
+		return true;
+	}
+
+	if( this->ioPattern_.isMasterProcessorDistribute())
+	{
+		
+		auto gatherT = pFlow::MPI::gatherMaster<T>(pFlowProcessors());
+		
+		if(!gatherT.gatherData(data))
+		{
+			fatalErrorInFunction<<"Error in gathering data to master"<<endl;
+			return false;
+		}
+
+		this->buffer_ = gatherT.moveData();
+		this->bufferSpan_ = span<T>(this->buffer_.data(),this->buffer_.size() );
+
+		return true;
+	
+	}
+
+	if( this->ioPattern_.isMasterProcessorOnly() || this->ioPattern_.isAllProcessorSimilar() )
+	{
+		if( this->ioPattern_.isMaster() )
+		{
+			this->bufferSpan_ = data;
+			return true;
+		}
+		else
+		{
+			this->bufferSpan_ = span<T>(nullptr, 0);
+			return true;
+		}
+	}
+
+	return false;
+	
+}
+
+template<typename T>
+pFlow::MPI::dataIOMPI<T>::dataIOMPI(const IOPattern& iop)
+:
+	dataIO<T>(iop)
+{}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.hpp b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.hpp
new file mode 100644
index 00000000..1bfeb94d
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPI.hpp
@@ -0,0 +1,58 @@
+#ifndef __datIOMPI_hpp__
+#define __datIOMPI_hpp__
+
+#include "dataIO.hpp"
+#include "pFlowProcessors.hpp"
+#include "gatherMaster.hpp"
+
+
+namespace pFlow::MPI
+{
+
+template<typename T>
+class dataIOMPI
+:
+	public dataIO<T>
+{
+public:
+
+    using DataIOType = dataIO<T>;
+
+    using DataIOMPIType = dataIOMPI<T>;
+
+protected:
+    
+    bool gatherData(span<T> data ) override;
+    
+public:
+
+	TypeInfoTemplate111("dataIO",T,"MPI");
+
+	explicit dataIOMPI(const IOPattern& iop);
+    
+	dataIOMPI(const dataIOMPI&) = default;
+
+	dataIOMPI(dataIOMPI&&) = default;
+
+
+	dataIOMPI& operator=(const dataIOMPI&) = default;
+
+	dataIOMPI& operator=(dataIOMPI&&) = default;
+
+	~dataIOMPI() = default;
+
+    add_vCtor
+    (
+        DataIOType,
+        DataIOMPIType,
+        IOPattern
+    );
+
+}; //dataIOMPI
+
+
+} //namespace pFlow::MPI
+
+#include "dataIOMPI.cpp"
+
+#endif //__datIOMPI_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPIs.cpp b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPIs.cpp
new file mode 100644
index 00000000..73d307f2
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/dataIOMPI/dataIOMPIs.cpp
@@ -0,0 +1,27 @@
+
+#include "types.hpp"
+#include "dataIOMPI.hpp"
+
+
+template class pFlow::MPI::dataIOMPI<pFlow::uint8>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::int8>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::int32>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::int64>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::uint32>;
+template class pFlow::MPI::dataIOMPI<pFlow::uint32x3>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::uint64>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::size_t>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::real>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::realx3>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::realx4>;
+
+template class pFlow::MPI::dataIOMPI<pFlow::word>;
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
new file mode 100644
index 00000000..37ecb052
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.cpp
@@ -0,0 +1,431 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "MPISimulationDomain.hpp"
+#include "systemControl.hpp"
+#include "rcb1DPartitioning.hpp"
+#include "scatteredMasterDistribute.hpp"
+#include "scatteredMasterDistributeChar.hpp"
+
+pFlow::MPI::MPISimulationDomain::MPISimulationDomain(systemControl& control)
+:
+    simulationDomain(control),
+    communication_(pFlowProcessors()),
+    subDomainsAll_(pFlowProcessors()),
+    numPointsAll_(pFlowProcessors()),
+    domainPartitioning_( makeUnique<rcb1DPartitioning>(subDict("decomposition"), globalBox()))
+{}
+
+bool pFlow::MPI::MPISimulationDomain::createBoundaryDicts()
+{
+
+    dictionary& boundaries = this->subDict("boundaries");
+    
+    dictionary& thisBoundaries = this->subDict(thisBoundariesDictName());
+    
+    auto neighbors = findPlaneNeighbors();
+
+    for(uint32 i=0; i<sizeOfBoundaries(); i++)
+	{
+		
+        word bName = bundaryName(i);
+		auto& bDict = thisBoundaries.subDict(bName);
+
+        if( thisDomainActive_ )
+        {
+            if( neighbors[i] == -1 )
+            {
+                bDict.add("neighborProcessorNo", processors::globalRank());
+            }
+            else
+            {
+                bDict.add("neighborProcessorNo", neighbors[i]);
+                bDict.addOrReplace("type", "processor");
+            }
+        }
+        else
+        {
+            bDict.add("neighborProcessorNo", processors::globalRank());
+            bDict.addOrReplace("type", "none");
+        }
+		
+        if( bDict.getVal<word>("type") == "periodic")
+        {
+            fatalErrorInFunction<<
+            "periodic is not implemented "<<endl;
+            fatalExit;
+        }
+	}
+
+    return true;
+}
+
+bool pFlow::MPI::MPISimulationDomain::setThisDomain()
+{
+    thisDomain_ = domain(domainPartitioning_->localBox());
+    uint32 thisNumPoints = initialNumberInThis();
+
+    if(!communication_.collectAllToAll(thisNumPoints, numPointsAll_))
+    {
+        fatalErrorInFunction<<
+        "Failed to distribute number of points."<<endl;
+        return false;
+    }
+    uint32 allNumPoints = std::accumulate(numPointsAll_.begin(), numPointsAll_.end(), 0u);
+
+    if( thisNumPoints != 0u )
+    {
+        thisDomainActive_ = true;
+    }
+    else 
+    {
+        if(communication_.localMaster()&& allNumPoints == 0u)
+            thisDomainActive_ = true;
+        else
+            thisDomainActive_ = false;
+    }
+
+    if( thisDomainActive_ )
+    {
+        bool allInactive = true;
+        for(int32 i=0; i<communication_.localSize(); i++ )
+        {
+            if(i == communication_.localRank() )continue;
+            if(numPointsAll_[i]!=0)
+            {
+                allInactive = false;
+                break;
+            }
+        }
+
+        if(allInactive)
+        {
+            thisDomain_ = domain(globalBox());
+        }
+    }
+    
+    if(!communication_.collectAllToAll(thisDomain_, subDomainsAll_))
+    {
+        fatalErrorInFunction<< "Failed to distributed domains"<<endl;
+        return false;
+    }
+    
+    return true;
+}
+
+std::vector<int> pFlow::MPI::MPISimulationDomain::findPlaneNeighbors() const
+{
+
+    std::vector<int> neighbors(sizeOfBoundaries(),  -2);
+    domain gDomain(globalBox());
+
+    // left 
+    if( thisDomain_.left().parallelTouch( gDomain.left() ) )
+    {
+        neighbors[0] = -1;
+    }
+    
+    for(int i=0; i<subDomainsAll_.size(); i++)
+    {   
+        if(i == subDomainsAll_.rank())continue;
+
+        if( thisDomain_.left().parallelTouch(
+            subDomainsAll_[i].right()) )
+        {
+            neighbors[0] = i;
+            break;
+        }
+    }
+
+    // right 
+    if( thisDomain_.right().parallelTouch( gDomain.right() ) )
+    {
+        neighbors[1] = -1;
+    }
+
+    for(int i=0; i<subDomainsAll_.size(); i++)
+    {
+        
+        if(i == subDomainsAll_.rank())continue;
+
+        if( thisDomain_.right().parallelTouch(
+            subDomainsAll_[i].left()) )
+        {
+            neighbors[1] = i;
+            break;
+        }
+    }
+
+    // bottom
+    if( thisDomain_.bottom().parallelTouch( gDomain.bottom() ) )
+    {
+        neighbors[2] = -1;
+    }
+    
+    for(int i=0; i<subDomainsAll_.size(); i++)
+    {   
+        if(i == subDomainsAll_.rank())continue;
+
+        if( thisDomain_.bottom().parallelTouch(
+            subDomainsAll_[i].top()) )
+        {
+            neighbors[2] = i;
+            break;
+        }
+    }
+
+    // top
+    if( thisDomain_.top().parallelTouch( gDomain.top() ) )
+    {
+        neighbors[3] = -1;
+    }
+    
+    for(int i=0; i<subDomainsAll_.size(); i++)
+    {   
+        if(i == subDomainsAll_.rank())continue;
+
+        if( thisDomain_.top().parallelTouch(
+            subDomainsAll_[i].bottom()) )
+        {
+            neighbors[3] = i;
+            break;
+        }
+    }
+
+    // rear 
+    if( thisDomain_.rear().parallelTouch( gDomain.rear() ) )
+    {
+        neighbors[4] = -1;
+    }
+    
+    for(int i=0; i<subDomainsAll_.size(); i++)
+    {   
+        if(i == subDomainsAll_.rank())continue;
+
+        if( thisDomain_.rear().parallelTouch(
+            subDomainsAll_[i].front()) )
+        {
+            neighbors[4] = i;
+            break;
+        }
+    }
+
+    // front
+    if( thisDomain_.front().parallelTouch( gDomain.front() ) )
+    {
+        neighbors[5] = -1;
+    }
+    
+    for(int i=0; i<subDomainsAll_.size(); i++)
+    {   
+        if(i == subDomainsAll_.rank())continue;
+
+        if( thisDomain_.front().parallelTouch(
+            subDomainsAll_[i].rear()) )
+        {
+            neighbors[5] = i;
+            break;
+        }
+    }
+    return neighbors;
+}
+
+
+bool pFlow::MPI::MPISimulationDomain::initialUpdateDomains(span<realx3> pointPos)
+{
+    pFlagTypeHost flags(pointPos.size(), 0 , pointPos.size());
+    initialNumPoints_ = pointPos.size();
+    if( !domainPartitioning_->partition(pointPos, flags) )
+    {
+        fatalErrorInFunction<<
+        "Point partitioning failed."<<endl;
+        return false;
+    }
+    
+    if(!setThisDomain()) return false;
+
+    if(!createBoundaryDicts()) return false;
+
+    return true;
+}
+
+pFlow::uint32 pFlow::MPI::MPISimulationDomain::initialNumberInThis() const
+{
+    uint32 numImport = domainPartitioning_->numberImportThisProc();
+    uint32 numExport = domainPartitioning_->numberExportThisProc();
+    return max(initialNumPoints_+ numImport - numExport, 0u);
+}
+
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
+(  
+    span<char> src, 
+    span<char> dst, 
+    size_t sizeOfElement
+)const
+{
+    MPI::scatteredMasterDistribute<char> dataDist(sizeOfElement, pFlowProcessors());
+    
+    auto lists = domainPartitioning_->allExportLists();
+    
+    if(!dataDist.setDataMaps( lists ))
+    {
+        fatalErrorInFunction;
+        return false;
+    }
+
+    if(!dataDist.distribute(src, dst))
+    {
+        fatalErrorInFunction<<
+        "Error in distribute"<<endl;
+        return false;
+    }
+    return true;
+}
+
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
+(
+    span<realx3> src, 
+    span<realx3> dst
+)const
+{
+    
+    MPI::scatteredMasterDistribute<realx3> 
+        dataDist(pFlowProcessors());
+    auto lists = domainPartitioning_->allExportLists();
+    
+    if(!dataDist.setDataMaps( lists ))
+    {
+        fatalErrorInFunction;
+        return false;
+    }
+    
+    if(!dataDist.distribute(src, dst))
+    {
+        fatalErrorInFunction<<
+        "Error in distribute"<<endl;
+        return false;
+    }
+
+    return true;
+}
+
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
+(
+    span<real> src, 
+    span<real> dst
+)const
+{
+    MPI::scatteredMasterDistribute<real> 
+        dataDist(pFlowProcessors());
+
+    auto lists = domainPartitioning_->allExportLists();
+
+    if(!dataDist.setDataMaps( lists ))
+    {
+        fatalErrorInFunction;
+        return false;
+    }
+
+    if(!dataDist.distribute(src, dst))
+    {
+        fatalErrorInFunction<<
+        "Error in distribute"<<endl;
+        return false;
+    }
+
+    return true;
+}
+
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
+(
+    span<uint32> src, 
+    span<uint32> dst
+)const
+{
+    MPI::scatteredMasterDistribute<uint32> 
+        dataDist(pFlowProcessors());
+
+    auto lists = domainPartitioning_->allExportLists();
+
+    if(!dataDist.setDataMaps( lists ))
+    {
+        fatalErrorInFunction;
+        return false;
+    }
+
+    if(!dataDist.distribute(src, dst))
+    {
+        fatalErrorInFunction<<
+        "Error in distribute"<<endl;
+        return false;
+    }
+
+    return true;
+}
+
+bool pFlow::MPI::MPISimulationDomain::initialTransferBlockData
+(
+    span<int32> src, 
+    span<int32> dst
+)const
+{
+    MPI::scatteredMasterDistribute<int32> 
+        dataDist(pFlowProcessors());
+
+    auto lists = domainPartitioning_->allExportLists();
+
+    if(!dataDist.setDataMaps( lists ))
+    {
+        fatalErrorInFunction;
+        return false;
+    }
+
+    if(!dataDist.distribute(src, dst))
+    {
+        fatalErrorInFunction<<
+        "Error in distribute"<<endl;
+        return false;
+    }
+
+    return true;
+}
+
+
+pFlow::uint32 pFlow::MPI::MPISimulationDomain::numberToBeImported() const
+{
+    return domainPartitioning_->numberImportThisProc();
+}
+
+pFlow::uint32 pFlow::MPI::MPISimulationDomain::numberToBeExported() const
+{
+    return domainPartitioning_->numberExportThisProc();
+}
+
+bool
+pFlow::MPI::MPISimulationDomain::domainActive() const
+{
+	return thisDomainActive_;
+}
+
+const pFlow::domain&
+pFlow::MPI::MPISimulationDomain::thisDomain() const
+{
+	return thisDomain_;
+}
diff --git a/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
new file mode 100644
index 00000000..8949409e
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/MPISimulationDomain.hpp
@@ -0,0 +1,116 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __MPISimulationDomain_hpp__
+#define __MPISimulationDomain_hpp__
+
+#include "partitioning.hpp"
+#include "procCommunication.hpp"
+#include "procVector.hpp"
+#include "simulationDomain.hpp"
+
+namespace pFlow::MPI
+{
+
+class MPISimulationDomain : public simulationDomain
+{
+private:
+
+	/// a processor communcator for simulation domain 
+	procCommunication  communication_;
+
+	/// sub-domain (thisDomain_ for all processors)
+	procVector<domain> subDomainsAll_;
+
+	/// number of points in all processors 
+	procVector<uint32> numPointsAll_;
+
+	/// partitioning object 
+	uniquePtr<partitioning> domainPartitioning_ = nullptr;
+
+	/// the acutal limits of the simulation domain in this processor 
+	domain                  thisDomain_;
+
+	uint32                  initialNumPoints_ = 0;
+
+	bool                    thisDomainActive_ = false;
+
+	bool                    createBoundaryDicts() final;
+
+	bool                    setThisDomain() final;
+
+	std::vector<int>        findPlaneNeighbors() const;
+
+public:
+
+	TypeInfo("simulationDomain<MPI>");
+
+	explicit MPISimulationDomain(systemControl& control);
+
+	~MPISimulationDomain() final = default;
+
+	add_vCtor
+    (
+        simulationDomain, 
+        MPISimulationDomain, 
+        systemControl
+    );
+
+	/// @brief
+	/// @param pointPos
+	/// @return
+	bool initialUpdateDomains(span<realx3> pointPos) final;
+
+	/// @brief
+	/// @return
+	uint32 initialNumberInThis() const final;
+
+	bool  initialTransferBlockData(
+			span<char> src,
+			span<char> dst,
+			size_t     sizeOfElement
+			) const final;
+
+	bool initialTransferBlockData(span<realx3> src, span<realx3> dst)
+	  const final;
+
+	bool initialTransferBlockData(span<real> src, span<real> dst)
+	  const final;
+
+	bool initialTransferBlockData(span<uint32> src, span<uint32> dst)
+	  const final;
+
+	bool initialTransferBlockData(span<int32> src, span<int32> dst)
+	  const final;
+
+	uint32 numberToBeImported() const final;
+
+	uint32 numberToBeExported() const final;
+
+	/// @brief Is this domain active?
+	/// Active mean, there is particle in it and
+	/// boundaries and other entities of simulation domains are valid
+	bool   domainActive() const final;
+
+	const domain& thisDomain()const final;
+};
+
+} // namespace pFlow::MPI
+
+#endif // 
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.cpp b/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.cpp
new file mode 100644
index 00000000..0ae5cf82
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.cpp
@@ -0,0 +1,113 @@
+
+
+#include "partitioning.hpp"
+#include "error.hpp"
+#include "streams.hpp"
+
+void pFlow::partitioning::freeZoltan()
+{
+	if(validPointers_)
+	{
+		Zoltan::LB_Free_Part(&importGlobalGids_, &importLocalGids_, 
+                      	 	 &importProcs_, &importToPart_);
+
+		Zoltan::LB_Free_Part(&exportGlobalGids_, &exportLocalGids_, 
+                      		 &exportProcs_, &exportToPart_);
+        validPointers_ = false;
+	}
+	
+	zoltan_.release();
+}
+
+
+pFlow::partitioning::partitioning
+(
+    const dictionary& dict, 
+    const box& globalBox
+)
+:
+	globalBox_(globalBox)
+{
+	if(!zoltanInitialized__)
+	{
+		auto rc = Zoltan_Initialize
+        (
+            processors::argc(), 
+            processors::argv(), 
+            &version_
+        );
+        
+		if (rc != ZOLTAN_OK)
+		{
+			fatalErrorInFunction<<"Cannot initialize zoltan"<<endl;
+			fatalExit;
+		}
+		zoltanInitialized__ = true;
+	}
+
+	// Creates Zoltan object 
+	zoltan_ = std::make_unique<Zoltan>(pFlowProcessors().localCommunicator());
+
+	zoltan_->Set_Param("DEBUG_LEVEL", "0");
+  	zoltan_->Set_Param("LB_METHOD", "RCB");
+  	zoltan_->Set_Param("NUM_GID_ENTRIES", "1"); 
+  	zoltan_->Set_Param("NUM_LID_ENTRIES", "1");
+  	zoltan_->Set_Param("OBJ_WEIGHT_DIM", "0");
+    zoltan_->Set_Param("RETURN_LISTS", "ALL");
+	
+}
+
+bool pFlow::partitioning::partition(span<realx3> points, pFlagTypeHost flags)
+{
+    pointCollection pointCollctn{points, flags};
+
+    return partition(pointCollctn);
+}
+int GetObjectSize
+(
+    void *data,
+    int num_gid_entries, 
+    int num_lid_entries,
+    ZOLTAN_ID_PTR global_id,
+    ZOLTAN_ID_PTR local_id,
+    int *ierr
+)
+{
+    *ierr = ZOLTAN_OK;
+    pFlow::uint32 s = *(static_cast<pFlow::uint32*>(data));
+    return static_cast<int>(s);
+}
+
+void PackObject 
+(
+    void *data,
+    int num_gid_entries,
+    int num_lid_entries,
+    ZOLTAN_ID_PTR global_id,
+    ZOLTAN_ID_PTR local_id,
+    int dest,
+    int size,
+    char *buf,
+    int *ierr
+)
+{
+    
+}
+
+bool pFlow::partitioning::migrateData(span<char> src, span<char> dst, uint32 elementSize)
+{
+    dataCollection data{src, dst, elementSize};
+
+    zoltan_->Set_Obj_Size_Fn(GetObjectSize, &elementSize);
+    return false;
+}
+
+pFlow::partitioning::~partitioning()
+{
+	freeZoltan();
+}
+
+void pFlow::partitioning::printBox()const
+{
+	pOutput<< "localBox:" << localBox_<<endl;
+}
diff --git a/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.hpp b/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.hpp
new file mode 100644
index 00000000..c9483051
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/partitioning.hpp
@@ -0,0 +1,168 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __partitioning_hpp__
+#define __partitioning_hpp__
+
+#include "zoltan_cpp.h"
+
+#include "pFlowProcessors.hpp"
+#include "virtualConstructor.hpp"
+#include "box.hpp"
+#include "span.hpp"
+#include "pointFlag.hpp"
+#include "procVector.hpp"
+
+namespace pFlow
+{
+
+struct pointCollection
+{
+	span<realx3> points_;
+	pFlagTypeHost pFlag_;
+
+	uint32 numActivePoints()const
+	{
+		return pFlag_.numActive();
+	}
+};
+
+struct dataCollection
+{
+    span<char> srcData_;
+    span<char> dstData_;
+    uint32 elementSize_;
+};
+
+class partitioning
+{
+protected:
+
+	float 					version_ 	= 0.0;
+
+	std::unique_ptr<Zoltan> zoltan_ 	= nullptr;
+
+	bool					validPointers_ = false;
+
+	box 					globalBox_;
+
+	box 					localBox_; 	
+
+	int32 	changes_, numImport_, numExport_;
+  	
+  	id_t *importGlobalGids_, *importLocalGids_, *exportGlobalGids_, *exportLocalGids_; 
+  	
+  	int32 *importProcs_, *importToPart_, *exportProcs_, *exportToPart_;
+
+    uint32 numBeforePartition_ = 0 ;
+
+	static inline bool 		zoltanInitialized__ = false;
+
+	void freeZoltan();
+
+	virtual 
+	bool partition(pointCollection& points) = 0;
+
+public:
+
+	partitioning(
+		const dictionary& dict, 
+		const box& globalBox);
+
+	virtual 
+	~partitioning();
+
+    create_vCtor(
+        partitioning,
+        dictionary,
+        (
+            const dictionary& dict, 
+		    const box& globalBox
+        ),
+        (dict, globalBox));
+
+	bool partition(
+		span<realx3> points, 
+		pFlagTypeHost flags);
+
+    
+    bool migrateData(span<char> src, span<char> dst, uint32 elementSize);
+
+	inline
+	auto localBox()const
+	{
+		return localBox_;
+	}
+
+	inline
+	const auto& globalBox()const
+	{
+		return globalBox_;
+	}
+
+	inline 
+	bool partitionsChanged()const
+	{
+		return changes_ == 1;
+	}
+
+    
+    uint32 numberImportThisProc()const
+    {
+        return numImport_;
+    }
+
+    uint32 numberExportThisProc()const
+    {
+        return numExport_;
+    }
+    
+    virtual
+    span<int32> exportList(int procNo)const = 0;
+
+    virtual 
+    pFlow::MPI::procVector<span<int32>> allExportLists()const=0;
+
+	void printBox()const;
+
+	
+};
+
+
+}
+
+
+#endif //__partitioning_hpp__
+
+
+
+/*static 
+	int getNumberOfPoints(void *data, int32 *ierr);
+
+	static 
+	void getPointList(
+		void *data, 
+		int32 sizeGID, 
+		int32 sizeLID,
+        id_t* globalID, 
+        id_t* localID,
+        int32 wgt_dim, 
+        float *obj_wgts, 
+        int32 *ierr);*/
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp b/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
new file mode 100644
index 00000000..ba147512
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.cpp
@@ -0,0 +1,330 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "zoltan_cpp.h"
+
+
+#include "error.hpp"
+#include "processors.hpp"
+#include "rcb1DPartitioning.hpp"
+
+bool pFlow::rcb1DPartitioning::partition(pointCollection &points)
+{
+
+    zoltan_->Set_Param("RCB_OUTPUT_LEVEL", "0");
+  	zoltan_->Set_Param("RCB_RECTILINEAR_BLOCKS", "1");
+  	zoltan_->Set_Param("KEEP_CUTS", "1"); 
+  	zoltan_->Set_Param("REDUCE_DIMENSIONS", "1");
+  	zoltan_->Set_Param("RCB_RECOMPUTE_BOX", "1");
+  	zoltan_->Set_Param("AVERAGE_CUTS", "0");
+    zoltan_->Set_Param("MIGRATE_ONLY_PROC_CHANGES", "0");
+
+  	zoltan_->Set_Num_Obj_Fn(rcb1DPartitioning::getNumberOfPoints, &points);
+  	zoltan_->Set_Obj_List_Fn(rcb1DPartitioning::getPointList, &points);
+  	zoltan_->Set_Num_Geom_Fn(rcb1DPartitioning::getNumGeometry, &points);
+  	switch (direction_)
+    {
+    case Direction::X:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_x, &points);
+        break;
+    case Direction::Y:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_y, &points);
+        break;
+    case Direction::Z:
+        zoltan_->Set_Geom_Multi_Fn(rcb1DPartitioning::getGeometryList_z, &points);
+        break;
+    }
+    
+	int numGidEntries_, numLidEntries_;
+	int rc = zoltan_->LB_Partition(changes_, numGidEntries_, numLidEntries_,
+    	numImport_, importGlobalGids_, importLocalGids_, importProcs_, importToPart_,
+    	numExport_, exportGlobalGids_, exportLocalGids_, exportProcs_, exportToPart_);
+
+    
+  	if (rc != ZOLTAN_OK)
+  	{	
+        fatalErrorInFunction<< "Zoltan faild to perform partitioning."<<endl;
+    	return false;
+  	}
+    
+    for(auto& ids:exportIds_)
+    {
+        ids.clear();
+    }
+    
+    std::vector<int32> thisProc(points.numActivePoints(),-1);
+        
+    for(auto i =0; i<numExport_; i++)
+    {
+        exportIds_[exportProcs_[i]].push_back(exportGlobalGids_[i]);
+        thisProc[exportGlobalGids_[i]] = exportGlobalGids_[i];
+    }
+
+    for(int i=0; i<thisProc.size(); i++)
+    {
+        if(thisProc[i]==-1)
+            exportIds_[0].push_back(i);
+    }
+    
+  	validPointers_ = true;
+
+  	int nDim;
+  	double x0;
+  	double y0;
+  	double z0;
+  	double x1;
+  	double y1;
+  	double z1;
+  	zoltan_->RCB_Box
+    (
+        processors::globalRank(), 
+        nDim,
+   		x0, y0, z0,
+   		x1, y1, z1
+    );
+
+  	localBox_ = globalBox_;
+
+    if(equal(x0, x1))
+    {
+        x0 = x0 - 0.00001;
+        x1 = x1 + 0.00001;
+    }
+
+    switch (direction_)
+    {
+    case Direction::X :
+        localBox_.minPoint().x_ = x0;
+  	    localBox_.maxPoint().x_ = x1;
+        break;
+
+    case Direction::Y :
+        localBox_.minPoint().y_ = x0;
+  	    localBox_.maxPoint().y_ = x1;
+        break;
+    
+    case Direction::Z :
+        localBox_.minPoint().z_ = x0;
+  	    localBox_.maxPoint().z_ = x1;
+        break;
+    }
+  	
+
+    localBox_.minPoint() = max(localBox_.minPoint(), globalBox_.minPoint());
+    localBox_.maxPoint() = min(localBox_.maxPoint(), globalBox_.maxPoint());
+
+
+  	return true;
+}
+
+pFlow::rcb1DPartitioning::rcb1DPartitioning
+(
+    const dictionary &dict,
+    const box &globalBox
+)
+: 
+    partitioning(dict, globalBox),
+    exportIds_(pFlowProcessors())
+{
+
+    word directionName = dict.getVal<word>("direction");
+
+	if(toUpper(directionName)== "X")
+    {
+        direction_ = Direction::X;
+        dirVector_ ={1.0, 0.0, 0.0};
+    }
+    else if( toUpper(directionName) == "Y")
+    {
+        direction_ = Direction::Y;
+        dirVector_ ={0.0, 1.0, 0.0};
+    }
+    else if( toUpper(directionName) == "Z")
+    {
+        direction_ = Direction::Z;
+        dirVector_ ={0.0, 0.0, 1.0};
+    }
+    else
+    {
+        fatalErrorInFunction<< "wrong direction  in dictionary "<<
+        dict.globalName()<<". Directions should be one of x, y, or z."<<endl;
+        fatalError;
+    }       
+			
+}
+
+int pFlow::rcb1DPartitioning::getNumGeometry(void *data, int *ierr)
+{
+  *ierr = ZOLTAN_OK;
+  return 1;
+}
+
+int pFlow::rcb1DPartitioning::getNumberOfPoints(void *data, int *ierr)
+{
+    auto *obj = static_cast<pointCollection *>(data);
+
+    *ierr = ZOLTAN_OK;
+
+    return obj->numActivePoints();
+}
+
+void pFlow::rcb1DPartitioning::getPointList
+(
+    void *data, 
+    int sizeGID, 
+    int sizeLID, 
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID, 
+    int wgt_dim, 
+    float *obj_wgts, 
+    int *ierr
+)
+{			
+    auto* obj = static_cast<pointCollection *>(data);
+    *ierr = ZOLTAN_OK;
+    
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            globalID[n] = i;
+            localID[n] = n;
+            n++;
+        }
+    }
+
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_x
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].x_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_y
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].y_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
+void pFlow::rcb1DPartitioning::getGeometryList_z
+(
+	void *data, 
+	int sizeGID, 
+	int sizeLID,
+	int num_obj,
+    ZOLTAN_ID_PTR globalID, 
+    ZOLTAN_ID_PTR localID,
+    int num_dim, 
+    double *geom_vec, 
+    int *ierr
+)
+{
+
+	auto* obj = static_cast<pointCollection *>(data);
+
+  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+  	{
+    	*ierr = ZOLTAN_FATAL;
+    	return;
+  	}
+
+    auto activeRange = obj->pFlag_.activeRange();
+    uint32 n = 0;
+    for (auto i=activeRange.start(); i<activeRange.end(); i++)
+    {
+        if( obj->pFlag_.isActive(i) )
+        {
+            geom_vec[n] 	  = obj->points_[i].z_;
+            n++;
+        }
+    }
+
+  	*ierr = ZOLTAN_OK;
+
+	return;
+}
+
diff --git a/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.hpp b/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.hpp
new file mode 100644
index 00000000..b58532e3
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/domain/partitioning/rcb1DPartitioning.hpp
@@ -0,0 +1,240 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __rcb1DPartitioning_hpp__
+#define __rcb1DPartitioning_hpp__
+
+#include "partitioning.hpp"
+#include "procVector.hpp"
+
+namespace pFlow
+{
+
+
+class rcb1DPartitioning
+:
+public partitioning
+{
+public:
+
+	enum Direction  
+	{
+		X = 0,
+		Y = 1,
+		Z = 2
+	};
+
+protected:
+
+	/// Direction of partitioning
+	Direction   direction_ = Direction::X;
+
+	realx3 		dirVector_ = {1.0, 0.0, 0.0};
+
+    word        directionName_ = "x";
+
+    MPI::procVector<std::vector<int>> exportIds_;
+	
+	bool partition(pointCollection& points) override;
+
+public:
+
+	
+	rcb1DPartitioning(
+		const dictionary& dict, 
+		const box& globalBox);
+	
+
+	~rcb1DPartitioning() override=default;
+
+    span<int32> exportList(int procNo)const override
+    {
+        return span<int32>(
+            const_cast<int32*>(exportIds_[procNo].data()), 
+            exportIds_[procNo].size());
+    }
+
+    
+    pFlow::MPI::procVector<span<int32>> allExportLists()const override
+    {
+        pFlow::MPI::procVector<span<int32>> allList(pFlowProcessors());
+
+        for(int i=0; i<allList.size(); i++)
+            allList[i]= exportList(i);
+
+        return allList;
+    }
+
+	static 
+	int getNumGeometry(void *data, int *ierr);
+
+	static
+	int getNumberOfPoints(void *data, int *ierr);
+	
+
+	static
+	void getPointList
+	(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		ZOLTAN_ID_PTR globalID, 
+		ZOLTAN_ID_PTR localID,
+		int wgt_dim, 
+		float *obj_wgts, 
+		int *ierr
+	);
+
+    static
+    void getGeometryList_x(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+
+    static
+    void getGeometryList_y(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+
+    static
+    void getGeometryList_z(
+        void *data, 
+        int sizeGID, 
+        int sizeLID, 
+        int num_obj, 
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID, 
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+};
+
+/*class RCB_y_partitioning
+:
+public partitioning
+{
+public:
+
+	
+	RCB_y_partitioning(int argc, char *argv[], pointCollection& collection, const box& gBox)
+	:
+	partitioning(argc, argv, collection, gBox)
+	{}
+
+	virtual 
+	~RCB_y_partitioning()=default;
+
+	
+	bool partition() override;
+
+
+	static 
+	void getGeometryList(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		int num_obj,
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID,
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr)
+	{
+
+		auto* obj = static_cast<pointCollection *>(data);
+
+	  	if ( (sizeGID != 1) || (sizeLID != 1) || (num_dim != 1))
+	  	{
+	    	*ierr = ZOLTAN_FATAL;
+	    	return;
+	  	}
+
+	  	*ierr = ZOLTAN_OK;
+
+	  	for (int i=0;  i < num_obj ; i++)
+	  	{
+	    	geom_vec[i] 	  = obj->pointList()[i].y_;
+		}
+
+	  return;
+	}
+
+	
+	static 
+	int getNumGeometry(void *data, int *ierr)
+	{
+	  *ierr = ZOLTAN_OK;
+	  return 1;
+	}
+
+};
+
+
+class RCB_x_partitioning
+:
+public partitioning
+{
+public:
+
+	
+	RCB_x_partitioning(int argc, char *argv[], pointCollection& collection, const box& gBox)
+	:
+	partitioning(argc, argv, collection, gBox)
+	{}
+
+	virtual 
+	~RCB_x_partitioning()=default;
+
+	
+	bool partition() override;
+
+
+	static 
+	void getGeometryList(
+		void *data, 
+		int sizeGID, 
+		int sizeLID,
+		int num_obj,
+        ZOLTAN_ID_PTR globalID, 
+        ZOLTAN_ID_PTR localID,
+        int num_dim, 
+        double *geom_vec, 
+        int *ierr);
+	
+	static 
+	int getNumGeometry(void *data, int *ierr);
+	
+
+};*/
+
+} // pFlow
+#endif //__rcb1DPartitioning_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
new file mode 100644
index 00000000..d622a971
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.cpp
@@ -0,0 +1,284 @@
+#include "processorBoundaryField.hpp"
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+template<class T, class MemorySpace>
+void
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::checkDataRecieved() const
+{
+	if (!dataRecieved_)
+	{
+		uint32 nRecv = neighborProcField_.waitBufferForUse();
+		dataRecieved_ = true;
+		if (nRecv != this->neighborProcSize())
+		{
+			fatalErrorInFunction<<
+			"number of recived data is "<< nRecv <<" and expected number is "<<
+			this->neighborProcSize()<< " in "<<this->name() <<endl;
+			fatalExit;
+		}
+
+		//pOutput<<"field data "<< this->name()<<" has recieved with size "<< nRecv<<endl;
+	}
+}
+
+template<class T, class MemorySpace>
+bool
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundary(
+  int           step,
+  DataDirection direction
+)
+{
+#ifndef BoundaryModel1
+	if(!this->boundary().performBoundarytUpdate())
+		return true;
+#endif
+
+	if (step == 1)
+	{
+		// Isend
+		if (direction == DataDirection::TwoWay || 
+		( this->isBoundaryMaster() && direction == DataDirection::MasterToSlave) || 
+		(!this->isBoundaryMaster() && direction == DataDirection::SlaveToMaster))
+		{
+			thisFieldInNeighbor_.sendData(pFlowProcessors(), this->thisField(), this->name());
+			dataRecieved_ = false;
+			//pOutput<<"request for boundary update "<< this->name()<<" direction "<< (int)direction<<endl; 
+		}
+		
+	}
+	else if (step == 2)
+	{
+		// Irecv
+		if (direction == DataDirection::TwoWay || 
+		(!this->isBoundaryMaster() && direction == DataDirection::MasterToSlave) || 
+		( this->isBoundaryMaster() && direction == DataDirection::SlaveToMaster))
+		{
+			neighborProcField_.recieveData(pFlowProcessors(), this->neighborProcSize(), this->name());
+			dataRecieved_ = false;
+			//pOutput<<"request for boundary update "<< this->name()<<" direction "<< (int)direction<<endl;
+		}
+	}
+	else
+	{
+		fatalErrorInFunction << "Invalid step number " << step << endl;
+		return false;
+	}
+
+	return true;
+}
+
+template<class T, class MemorySpace>
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::processorBoundaryField(
+  const boundaryBase&   boundary,
+  const pointStructure& pStruct,
+  InternalFieldType&    internal
+)
+  : BoundaryFieldType(boundary, pStruct, internal),
+    thisFieldInNeighbor_(
+      groupNames("sendBuffer", this->name()),
+      boundary.neighborProcessorNo(),
+      boundary.thisBoundaryIndex()
+    ),
+    neighborProcField_(
+      groupNames("recieveBuffer", boundary.name()),
+      boundary.neighborProcessorNo(),
+      boundary.mirrorBoundaryIndex()
+    )
+{
+	this->addEvent(message::BNDR_PROCTRANSFER_SEND).
+	addEvent(message::BNDR_PROCTRANSFER_RECIEVE).
+	addEvent(message::BNDR_PROCTRANSFER_WAITFILL).
+	addEvent(message::BNDR_PROC_SIZE_CHANGED);
+}
+
+template<class T, class MemorySpace>
+typename pFlow::MPI::processorBoundaryField<T, MemorySpace>::ProcVectorType&
+pFlow::MPI::processorBoundaryField<T, MemorySpace>::neighborProcField()
+{
+	checkDataRecieved();
+	return neighborProcField_.buffer();
+}
+
+template<class T, class MemorySpace>
+const typename pFlow::MPI::processorBoundaryField<T, MemorySpace>::
+  ProcVectorType&
+  pFlow::MPI::processorBoundaryField<T, MemorySpace>::neighborProcField() const
+{
+	checkDataRecieved();
+	return neighborProcField_.buffer();
+}
+
+template<class T, class MemorySpace>
+bool pFlow::MPI::processorBoundaryField<T, MemorySpace>::hearChanges(
+	real t,
+	real dt,
+	uint32 iter,
+	const message& msg, 
+	const anyList& varList
+)
+{
+	BoundaryFieldType::hearChanges(t,dt,iter, msg,varList);
+	if(msg.equivalentTo(message::BNDR_PROC_SIZE_CHANGED))
+	{
+		auto newProcSize = varList.getObject<uint32>("size");
+		neighborProcField_.resize(newProcSize);
+	}
+
+	if(msg.equivalentTo(message::BNDR_PROCTRANSFER_SEND))
+	{
+		const auto& indices = varList.getObject<uint32Vector_D>(
+			message::eventName(message::BNDR_PROCTRANSFER_SEND)
+		);
+
+        if constexpr( isDeviceAccessible<execution_space>())
+        {
+            FieldAccessType transferData(
+			    indices.size(), 
+			    indices.deviceViewAll(),
+			    this->internal().deviceViewAll()
+		    );
+
+		    thisFieldInNeighbor_.sendData(pFlowProcessors(),transferData);
+        }
+        else
+        {
+            FieldAccessType transferData(
+			    indices.size(), 
+			    indices.hostViewAll(),
+			    this->internal().deviceViewAll()
+		    );
+            
+		    thisFieldInNeighbor_.sendData(pFlowProcessors(),transferData);
+        }
+
+		
+	}
+	else if(msg.equivalentTo(message::BNDR_PROCTRANSFER_RECIEVE))
+	{
+		uint32 numRecieved = varList.getObject<uint32>(
+			message::eventName(message::BNDR_PROCTRANSFER_RECIEVE)
+		);
+		neighborProcField_.recieveData(pFlowProcessors(), numRecieved);
+	}
+	else if(msg.equivalentTo(message::BNDR_PROCTRANSFER_WAITFILL))
+	{
+		
+		uint32 numRecieved = neighborProcField_.waitBufferForUse();
+
+		if(msg.equivalentTo(message::CAP_CHANGED))
+		{
+			auto newCap = varList.getObject<uint32>(
+				message::eventName(message::CAP_CHANGED));
+			this->internal().field().reserve(newCap);
+
+		}
+		if(msg.equivalentTo(message::SIZE_CHANGED))
+		{
+			auto newSize = varList.getObject<uint32>(
+				message::eventName(message::SIZE_CHANGED));
+			this->internal().field().resize(newSize);
+		}
+		
+		const auto& indices = varList.getObject<uint32IndexContainer>(
+			message::eventName(message::ITEM_INSERT));
+		
+		this->internal().field().insertSetElement(indices, neighborProcField_.buffer().deviceView());
+
+		return true;
+	}
+
+	return true;
+}
+template <class T, class MemorySpace>
+void pFlow::MPI::processorBoundaryField<T, MemorySpace>::sendBackData() const
+{
+	neighborProcField_.sendBackData(pFlowProcessors());
+	dataRecieved_ = false;
+}
+
+template <class T, class MemorySpace>
+void pFlow::MPI::processorBoundaryField<T, MemorySpace>::recieveBackData() const
+{
+	thisFieldInNeighbor_.recieveBackData(pFlowProcessors(), this->size());
+}
+
+template <class T, class MemorySpace>
+void pFlow::MPI::processorBoundaryField<T, MemorySpace>::addBufferToInternalField()const
+{
+	using RPolicy = Kokkos::RangePolicy<
+		execution_space,
+		Kokkos::Schedule<Kokkos::Static>,
+		Kokkos::IndexType<pFlow::uint32>>;
+
+	//pOutput<<"waiting for buffer to be recived in addBufferToInternalField "<<this->name()<<endl;
+	thisFieldInNeighbor_.waitBufferForUse();
+
+	const auto& buffView = thisFieldInNeighbor_.buffer().deviceViewAll();
+	const auto& field = this->internal().deviceViewAll();
+
+	if constexpr( isDeviceAccessible<execution_space> )
+	{
+		const auto& indices = this->indexList().deviceViewAll();
+		Kokkos::parallel_for(
+			"recieveBackData::"+this->name(),
+			RPolicy(0,this->size()),
+			LAMBDA_HD(uint32 i)
+			{
+				field[indices[i]] += buffView[i]; 
+			}
+		);
+		Kokkos::fence();
+	}
+	else
+	{
+		const auto& indices = this->boundary().indexListHost().deviceViewAll();
+		Kokkos::parallel_for(
+			"recieveBackData::"+this->name(),
+			RPolicy(0,this->size()),
+			LAMBDA_HD(uint32 i)
+			{
+				field[indices[i]] += buffView[i]; 
+			}
+		);
+		Kokkos::fence();
+	}
+}
+
+
+template <class T, class MemorySpace>
+void pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundaryToMaster()const
+{
+	if (!this->isBoundaryMaster() )
+	{
+		thisFieldInNeighbor_.sendData(pFlowProcessors(), this->thisField(), this->name());
+		dataRecieved_ = false;
+	}
+}
+
+template <class T, class MemorySpace>
+void pFlow::MPI::processorBoundaryField<T, MemorySpace>::updateBoundaryFromSlave()const
+{
+	if( this->isBoundaryMaster() )
+	{
+		neighborProcField_.recieveData(pFlowProcessors(), this->neighborProcSize(), this->name());
+		dataRecieved_ = false;
+	}
+}
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
new file mode 100644
index 00000000..bd32d59d
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryField.hpp
@@ -0,0 +1,117 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow       
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------  
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating 
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions. 
+ 
+  phasicFlow is distributed to help others in their research in the field of 
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+#ifndef __processorBoundaryField_hpp__
+#define __processorBoundaryField_hpp__
+
+#include "boundaryField.hpp"
+#include "dataSender.hpp"
+#include "dataReciever.hpp"
+#include "boundaryProcessor.hpp"
+
+namespace pFlow::MPI
+{
+
+template< class T, class MemorySpace = void>
+class processorBoundaryField
+:
+    public boundaryField<T, MemorySpace> 
+{
+public:
+	
+    using processorBoundaryFieldType = processorBoundaryField<T, MemorySpace>;
+
+	using BoundaryFieldType = boundaryField<T, MemorySpace>;
+
+	using InternalFieldType = typename BoundaryFieldType::InternalFieldType;
+
+	using memory_space 		= typename BoundaryFieldType::memory_space;
+
+	using execution_space 	= typename BoundaryFieldType::execution_space;
+
+	using FieldAccessType 	= typename BoundaryFieldType::FieldAccessType;
+
+	using ProcVectorType  = typename BoundaryFieldType::ProcVectorType;
+
+private:
+
+	mutable dataSender<T, MemorySpace>   thisFieldInNeighbor_;
+
+	mutable dataReciever<T, MemorySpace> neighborProcField_;
+
+	mutable bool                 dataRecieved_ = true;
+
+	void checkDataRecieved()const;
+
+	bool updateBoundary(int step, DataDirection direction);
+	
+
+public:
+
+	TypeInfoTemplate211("boundaryField","processor", T, memory_space::name());
+
+	processorBoundaryField(
+		const boundaryBase& boundary, 
+		const pointStructure& pStruct,
+		InternalFieldType& internal);
+
+	
+	~processorBoundaryField()override = default;
+
+	add_vCtor
+	(
+		BoundaryFieldType,
+		processorBoundaryFieldType,
+		boundaryBase
+	);
+	
+	ProcVectorType& neighborProcField() override;
+
+	const ProcVectorType& neighborProcField()const override;
+	
+	void fill(const T& val)override
+	{
+		neighborProcField_.fill(val);
+	}
+
+	bool hearChanges(
+		real t,
+		real dt,
+		uint32 iter,
+		const message& msg, 
+    	const anyList& varList
+	) override;
+    
+	void sendBackData()const;
+
+	void recieveBackData()const;
+
+	void addBufferToInternalField()const;
+
+	void updateBoundaryToMaster()const;
+
+	void updateBoundaryFromSlave()const;
+
+};
+
+}
+
+#include "processorBoundaryField.cpp"
+
+#endif //__processorBoundaryField_hpp__
diff --git a/src/phasicFlow/MPIParallelization/pointField/processorBoundaryFields.cpp b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryFields.cpp
new file mode 100644
index 00000000..f07f20d9
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointField/processorBoundaryFields.cpp
@@ -0,0 +1,24 @@
+
+//#include "Field.hpp"
+
+#include "processorBoundaryField.hpp"
+
+template class pFlow::MPI::processorBoundaryField<pFlow::uint8>;
+template class pFlow::MPI::processorBoundaryField<pFlow::uint8, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::uint32>;
+template class pFlow::MPI::processorBoundaryField<pFlow::uint32, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::uint64>;
+template class pFlow::MPI::processorBoundaryField<pFlow::uint64, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::real>;
+template class pFlow::MPI::processorBoundaryField<pFlow::real, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::realx3>;
+template class pFlow::MPI::processorBoundaryField<pFlow::realx3, pFlow::HostSpace>;
+
+template class pFlow::MPI::processorBoundaryField<pFlow::realx4>;
+template class pFlow::MPI::processorBoundaryField<pFlow::realx4, pFlow::HostSpace>;
+
+
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
new file mode 100644
index 00000000..f781a00c
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.cpp
@@ -0,0 +1,432 @@
+/*------------------------------- phasicFlow ---------------------------------
+      O        C enter of
+     O O       E ngineering and
+    O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "boundaryProcessor.hpp"
+#include "boundaryProcessorKernels.hpp"
+#include "dictionary.hpp"
+#include "mpiCommunication.hpp"
+#include "boundaryBaseKernels.hpp"
+#include "internalPoints.hpp"
+#include "Time.hpp"
+#include "anyList.hpp"
+
+
+void
+pFlow::MPI::boundaryProcessor::checkDataRecieved() const
+{
+	if (!dataRecieved_)
+	{
+		uint32 nRecv = neighborProcPoints_.waitBufferForUse();
+		dataRecieved_ = true;
+		if (nRecv != neighborProcSize())
+		{
+			fatalErrorInFunction<<"In boundary "<<this->name()<<
+			" ,number of recieved data is "<< nRecv<<
+			" and neighborProcSize is "<<neighborProcSize()<<endl;
+			fatalExit;
+		}
+	}
+}
+
+pFlow::MPI::boundaryProcessor::boundaryProcessor(
+  const dictionary& dict,
+  const plane&      bplane,
+  internalPoints&   internal,
+  boundaryList&     bndrs,
+  uint32            thisIndex
+)
+  : boundaryBase(dict, bplane, internal, bndrs, thisIndex),
+    thisPointsInNeighbor_(
+      groupNames("sendBuffer", name()),
+      neighborProcessorNo(),
+      thisBoundaryIndex()
+    ),
+    neighborProcPoints_(
+      groupNames("neighborProcPoints", name()),
+      neighborProcessorNo(),
+      mirrorBoundaryIndex()
+    )
+{
+}
+
+bool
+pFlow::MPI::boundaryProcessor::beforeIteration(
+	uint32 step, 
+	const timeInfo& ti, 
+	bool updateIter, 
+	bool iterBeforeUpdate , 
+	bool& callAgain
+)
+{
+	if(step == 1)
+	{
+		boundaryBase::beforeIteration(step, ti, updateIter, iterBeforeUpdate, callAgain);
+		callAgain = true;
+	}
+	else if(step == 2 )
+	{
+
+#ifdef BoundaryModel1
+	callAgain = true;
+#else
+	if(!performBoundarytUpdate())
+	{
+		callAgain = false;
+		return true;
+	}
+#endif
+
+	thisNumPoints_ = size();
+
+	MPI_Isend(
+		&thisNumPoints_,
+		1,
+		MPI_UNSIGNED,
+		neighborProcessorNo(),
+		thisBoundaryIndex(),
+		pFlowProcessors().localCommunicator(),
+		&numPointsRequest0_);
+
+	MPI_Irecv(
+		&neighborProcNumPoints_,
+		1,
+		MPI_UNSIGNED,
+		neighborProcessorNo(),
+		mirrorBoundaryIndex(),
+		pFlowProcessors().localCommunicator(),
+		&numPointsRequest_
+	);
+
+	}
+	else if(step == 3 )
+	{
+		callAgain = true;
+
+		if(numPointsRequest_ != RequestNull)
+		{
+			MPI_Wait(&numPointsRequest_, MPI_STATUS_IGNORE);
+			if(numPointsRequest0_!= RequestNull)
+			{
+				MPI_Wait(&numPointsRequest0_, MPI_STATUS_IGNORE);
+			}
+		}
+		
+		// Size has not been changed. Notification is not required. 
+		if(neighborProcNumPoints_ == neighborProcPoints_.size()) return true;
+
+		anyList varList;
+		message msg;
+
+		varList.emplaceBack(msg.addAndName(message::BNDR_PROC_SIZE_CHANGED), neighborProcNumPoints_);
+
+		if( !notify(ti.iter(), ti.t(), ti.dt(), msg, varList) )
+		{
+			fatalErrorInFunction;
+			callAgain = false;
+			return false;
+		}
+		
+	}
+	else if(step == 4)
+	{
+		dataRecieved_ = false;
+		if ( !isBoundaryMaster())
+		{
+			thisPointsInNeighbor_.sendData(pFlowProcessors(), thisPoints(),"positions");
+		}
+		else if (isBoundaryMaster())
+		{
+			neighborProcPoints_.recieveData(pFlowProcessors(), neighborProcSize(), "positions");
+		}
+
+		callAgain = false;
+	}
+
+	return true;
+}
+
+pFlow::uint32
+pFlow::MPI::boundaryProcessor::neighborProcSize() const
+{
+	return neighborProcNumPoints_;
+}
+
+pFlow::realx3Vector_D&
+pFlow::MPI::boundaryProcessor::neighborProcPoints()
+{
+	checkDataRecieved();
+	return neighborProcPoints_.buffer();
+}
+
+const pFlow::realx3Vector_D&
+pFlow::MPI::boundaryProcessor::neighborProcPoints() const
+{
+	checkDataRecieved();
+	return neighborProcPoints_.buffer();
+}
+
+bool
+pFlow::MPI::boundaryProcessor::updataBoundaryData(int step)
+{
+	return true;
+}
+
+bool pFlow::MPI::boundaryProcessor::transferData(
+	uint32 iter, 
+	int step,
+	bool& callAgain
+)
+{
+	
+	if( !iterBeforeBoundaryUpdate() )
+	{
+		callAgain = false;
+		return true;
+	}
+
+    if(step == 1)
+	{
+		
+		uint32Vector_D transferFlags("transferFlags"+this->name());
+		
+		numToTransfer_ = markInNegativeSide(
+			"transferData::markToTransfer"+this->name(),
+			transferFlags);
+			
+		uint32Vector_D keepIndices("keepIndices");
+		
+		if(numToTransfer_ != 0u)
+		{
+			pFlow::boundaryBaseKernels::createRemoveKeepIndices
+			(
+				indexList(),
+				numToTransfer_,
+				transferFlags,
+				transferIndices_,
+				keepIndices,
+				false
+			);
+			
+			// delete transfer point from this processor 
+			if( !setRemoveKeepIndices(transferIndices_, keepIndices))
+			{
+				fatalErrorInFunction<<
+				"error in setting transfer and keep points in boundary "<< name()<<endl;
+				return false;
+			}
+		}
+		else
+		{
+			transferIndices_.clear();
+		}
+		
+		CheckMPI( Isend(
+			numToTransfer_, 
+			neighborProcessorNo(), 
+			thisBoundaryIndex(),
+			pFlowProcessors().localCommunicator(),
+			&numTransferRequest_), true );
+        
+		CheckMPI(Irecv(
+			numToRecieve_,
+			neighborProcessorNo(),
+			mirrorBoundaryIndex(),
+			pFlowProcessors().localCommunicator(),
+			&numRecieveRequest_), true);
+        
+		callAgain = true;
+		return true;
+	}
+	else if(step ==2) // to transferData to neighbor 
+	{
+		if(numTransferRequest_!= RequestNull)
+		{
+			Wait(&numTransferRequest_, StatusIgnore);
+		}
+		
+		if( numToTransfer_ == 0u)
+		{
+			callAgain = true;
+			return true;
+		} 
+
+		pointFieldAccessType transferPoints(
+			transferIndices_.size(), 
+			transferIndices_.deviceViewAll(),
+			internal().pointPositionDevice()
+		);
+
+		// this buffer is used temporarily 
+		thisPointsInNeighbor_.sendData(pFlowProcessors(), transferPoints);
+		
+		message msg;
+		anyList varList;
+		varList.emplaceBack( 
+		msg.addAndName(message::BNDR_PROCTRANSFER_SEND),
+		transferIndices_);
+
+		const auto ti = internal().time().TimeInfo();
+		
+		if(!notify(ti, msg,	varList)
+		)
+		{
+			fatalErrorInFunction;
+			callAgain = false;
+			return false;
+		} 
+		callAgain = true;
+		return true;
+	}
+	else if(step == 3) // to recieve data 
+	{
+
+		if(numRecieveRequest_ != RequestNull)
+		{
+			Wait(&numRecieveRequest_, StatusIgnore);
+		}
+		
+		if(numToRecieve_ == 0u)
+		{
+			callAgain = false;
+			return true;
+		} 
+		
+		// this buffer is being used temporarily 
+		neighborProcPoints_.recieveData(pFlowProcessors(), numToRecieve_);
+		
+		message msg;
+		anyList varList;
+		varList.emplaceBack( 
+		msg.addAndName(message::BNDR_PROCTRANSFER_RECIEVE),
+		numToRecieve_);
+
+		const auto ti = internal().time().TimeInfo();
+		if(!notify( ti,	msg, varList))
+		{
+			fatalErrorInFunction;
+			callAgain = false;
+			return false;
+		}
+
+		callAgain = true;
+		return true;
+	}
+	else if(step == 4) // to insert data 
+	{
+		if(numToRecieve_ == 0u)
+		{
+			callAgain = false;
+			return true;
+		}
+		
+		// points should be inserted first 
+		message msg(message::BNDR_PROCTRANSFER_WAITFILL);
+		anyList varList;
+
+		neighborProcPoints_.waitBufferForUse();
+		internal().insertPointsOnly(neighborProcPoints_.buffer(), msg, varList);
+		
+		const auto& indices = varList.getObject<uint32IndexContainer>(message::eventName(message::ITEM_INSERT));
+		
+		auto indView = deviceViewType1D<uint32>(indices.deviceView().data(), indices.deviceView().size());
+		
+		uint32Vector_D newIndices("newIndices", indView);
+
+		if(! appendNewIndices(newIndices))
+		{
+			fatalErrorInFunction;
+			callAgain = false;
+			return false;
+		}
+
+		const auto ti = internal().time().TimeInfo();
+		if(!notify(ti, msg, varList))
+		{
+			fatalErrorInFunction;
+			callAgain = false;
+			return false;
+		}
+
+		callAgain = false;
+		return true;
+	}
+	return true;
+}
+
+bool
+pFlow::MPI::boundaryProcessor::iterate(const timeInfo& ti)
+{
+	return true;
+}
+
+bool
+pFlow::MPI::boundaryProcessor::afterIteration(const timeInfo& ti)
+{
+	
+	uint32 s = size();
+	pOutput<<"size of boundary is "<< s <<endl;
+	uint32Vector_D transferFlags("transferFlags",s+1, s+1, RESERVE());
+	transferFlags.fill(0u);
+
+	const auto& transferD = transferFlags.deviceViewAll();
+	auto points = thisPoints();
+	auto p = boundaryPlane().infPlane();
+
+	uint32 numTransfer = 0;	
+
+	Kokkos::parallel_reduce
+	(
+		"boundaryProcessor::afterIteration",
+		deviceRPolicyStatic(0,s),
+		LAMBDA_HD(uint32 i, uint32& transferToUpdate)
+		{
+			if(p.pointInNegativeSide(points(i)))
+			{
+				transferD(i)=1;
+				transferToUpdate++;
+			}
+		}, 
+		numTransfer
+	);
+		
+	pOutput<<"Numebr to be transfered "<< numTransfer<<endl;
+		
+	uint32Vector_D transferIndices("transferIndices");
+	uint32Vector_D keepIndices("keepIndices");
+
+	pFlow::boundaryBaseKernels::createRemoveKeepIndices
+	(
+		indexList(),
+		numTransfer,
+		transferFlags,
+		transferIndices,
+		keepIndices
+	);
+	
+	// delete transfer point from this processor 
+	if( !setRemoveKeepIndices(transferIndices, keepIndices))
+	{
+		fatalErrorInFunction<<
+		"error in setting transfer and keep points in boundary "<< name()<<endl;
+		return false;
+	}
+	
+	return true;
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
new file mode 100644
index 00000000..bf957541
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessor.hpp
@@ -0,0 +1,137 @@
+/*------------------------------- phasicFlow ---------------------------------
+	  O        C enter of
+	 O O       E ngineering and
+	O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __boundaryProcessor_hpp__
+#define __boundaryProcessor_hpp__
+
+#include "boundaryBase.hpp"
+#include "timeInfo.hpp"
+#include "mpiTypes.hpp"
+#include "dataSender.hpp"
+#include "dataReciever.hpp"
+#include "boundaryConfigs.hpp"
+
+namespace pFlow::MPI
+{
+
+	class boundaryProcessor
+		: public boundaryBase
+	{
+	public:
+
+		using pointFieldAccessType = typename boundaryBase::pointFieldAccessType;
+
+	private:
+	
+		uint32               neighborProcNumPoints_ = 0;
+
+		uint32               thisNumPoints_ = 0;
+
+		Request 			 numPointsRequest_ = RequestNull;
+
+		Request 			 numPointsRequest0_ = RequestNull;
+
+		dataSender<realx3>   thisPointsInNeighbor_;
+
+		dataReciever<realx3> neighborProcPoints_;
+
+		mutable bool         dataRecieved_ = true;
+
+		uint32               numToTransfer_ = 0;
+
+		uint32               numToRecieve_ = 0;
+
+		uint32Vector_D       transferIndices_{"transferIndices"};
+
+		Request 			 numTransferRequest_ = RequestNull;
+
+		Request 			 numRecieveRequest_ = RequestNull;
+
+		void checkDataRecieved() const;
+
+		/// @brief  Update processor boundary data for this processor
+		/// @param step It is either 1 or 2 in the input to indicate
+		/// the update step
+		/// @return true if successful
+		/// @details This method is called by boundaryList two times to
+		/// allow processor boundaries to exchange data in two steps.
+		/// The first step is a buffered non-blocking send and the second
+		/// step is non-blocking recieve to get data.
+		bool updataBoundaryData(int step) override;
+
+		bool transferData(uint32 iter, int step, bool& callAgain) override;
+
+	public:
+		TypeInfo("boundary<processor>");
+
+		boundaryProcessor(
+			const dictionary &dict,
+			const plane &bplane,
+			internalPoints &internal,
+			boundaryList &bndrs,
+			uint32 thisIndex);
+
+		~boundaryProcessor() override = default;
+
+		add_vCtor(
+			boundaryBase,
+			boundaryProcessor,
+			dictionary);
+
+		bool beforeIteration(
+			uint32 step, 
+			const timeInfo& ti, 
+			bool updateIter, 
+			bool iterBeforeUpdate , 
+			bool& callAgain
+		) override;
+
+		bool iterate(const timeInfo& ti) override;
+
+		bool afterIteration(const timeInfo& ti) override;
+
+		/// @brief Return number of points in the neighbor processor boundary.
+		/// This is overriden from boundaryBase.
+		uint32 neighborProcSize() const override;
+
+		/// @brief Return a reference to point positions in the neighbor
+		/// processor boundary.
+		realx3Vector_D &neighborProcPoints() override;
+
+		/// @brief Return a const reference to point positions in the
+		/// neighbor processor boundary.
+		const realx3Vector_D &neighborProcPoints() const override;
+
+		 
+		uint32 numToTransfer()const override
+		{
+			return numToTransfer_;
+		}
+
+		 
+		uint32 numToRecieve()const override
+		{
+			return numToRecieve_;
+		}
+	};
+
+} // namespace pFlow::MPI
+
+#endif //__boundaryProcessor_hpp__
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessorKernels.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessorKernels.hpp
new file mode 100644
index 00000000..28e6aa4e
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/boundaryProcessorKernels.hpp
@@ -0,0 +1,56 @@
+/*------------------------------- phasicFlow ---------------------------------
+	  O        C enter of
+	 O O       E ngineering and
+	O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#include "phasicFlowKokkos.hpp"
+#include "infinitePlane.hpp"
+#include "scatteredFieldAccess.hpp"
+
+namespace pFlow::boundaryProcessorKernels
+{
+
+struct markNegative
+{
+    markNegative(const infinitePlane& pl,
+	const deviceViewType1D<uint32>& f,
+	const deviceScatteredFieldAccess<realx3>& p
+	)
+	:
+		plane_(pl),
+		flags_(f),
+		points_(p)
+	{}
+	
+	infinitePlane        plane_;
+    deviceViewType1D<uint32> flags_;
+    deviceScatteredFieldAccess<realx3> points_;
+
+	INLINE_FUNCTION_HD
+    void operator()(uint32 i, uint32& transferToUpdate)const
+    {
+        if(plane_.pointInNegativeSide(points_(i)))
+		{
+			flags_(i)=1;
+			transferToUpdate++;
+		}
+    }
+
+};
+
+}
\ No newline at end of file
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
new file mode 100644
index 00000000..c547f112
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataReciever.hpp
@@ -0,0 +1,135 @@
+
+#ifndef __dataReciever_hpp__
+#define __dataReciever_hpp__
+
+
+#include "span.hpp"
+#include "localProcessors.hpp"
+#include "mpiCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T, typename MemorySpace=void>
+class dataReciever
+{
+public:
+    
+    using BufferVectorType = VectorSingle<T, MemorySpace>;
+
+	using BufferVectorTypeHost = VectorSingle<T, HostSpace>;
+
+	using memory_space = typename BufferVectorType::memory_space;
+
+	using execution_space = typename BufferVectorType::execution_space;
+
+private:	
+    
+    BufferVectorType buffer_;
+
+	int              fromProc_;
+
+	int              tag_;
+
+	mutable Request  recvRequest_ = RequestNull;
+
+public:
+
+    dataReciever(const word& name, int from, int tag)
+    :
+        buffer_(name),
+        fromProc_(from),
+        tag_(tag)
+    {}
+
+    ~dataReciever()=default;
+
+    uint32 waitBufferForUse()const
+    {
+        if(recvRequest_ != RequestNull)
+        {
+            Status status;
+            MPI_Wait(&recvRequest_, &status);
+            int count;
+            CheckMPI(getCount<T>(&status, count), true);
+
+            return static_cast<uint32>(count);
+        }
+        else
+            return buffer_.size();
+    }
+
+    void sendBackData(
+        const localProcessors& processors)const
+    {
+        CheckMPI(
+            Isend(
+                buffer_.getSpan(),
+                fromProc_,
+                tag_,
+                processors.localCommunicator(),
+                &recvRequest_
+            ),
+            true
+        );
+    }
+
+    void recieveData(
+        const localProcessors&      processors,
+        uint32 numToRecv,
+        const word& name = "dataReciver"
+    )
+    {   
+        resize(numToRecv);
+        
+        CheckMPI(
+            Irecv(
+                buffer_.getSpan(), 
+                fromProc_, 
+                tag_, 
+                processors.localCommunicator(), 
+                &recvRequest_
+            ),
+            true
+        );
+    }
+
+    inline
+    auto& buffer()
+    {
+        return buffer_;
+    }
+
+    inline
+    const auto& buffer()const
+    {
+        return buffer_;
+    }
+
+    inline
+    void fill(const T& val)
+    {
+        waitBufferForUse();
+        buffer_.fill(val);
+    }
+
+    inline
+    uint32 size()const
+    {
+        return buffer_.size();
+    }
+
+    inline
+    void resize(uint32 newSize)
+    {
+        waitBufferForUse();
+        buffer_.clear();
+        buffer_.resize(newSize);
+    }
+
+};
+
+}
+
+
+#endif //__dataReciever_hpp__
diff --git a/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
new file mode 100644
index 00000000..bb4538f8
--- /dev/null
+++ b/src/phasicFlow/MPIParallelization/pointStructure/boundaries/dataSender.hpp
@@ -0,0 +1,202 @@
+/*------------------------------- phasicFlow ---------------------------------
+	  O        C enter of
+	 O O       E ngineering and
+	O   O      M ultiscale modeling of
+   OOOOOOO     F luid flow
+------------------------------------------------------------------------------
+  Copyright (C): www.cemf.ir
+  email: hamid.r.norouzi AT gmail.com
+------------------------------------------------------------------------------
+Licence:
+  This file is part of phasicFlow code. It is a free software for simulating
+  granular and multiphase flows. You can redistribute it and/or modify it under
+  the terms of GNU General Public License v3 or any other later versions.
+
+  phasicFlow is distributed to help others in their research in the field of
+  granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the
+  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+-----------------------------------------------------------------------------*/
+
+#ifndef __dataSender_hpp__
+#define __dataSender_hpp__
+
+#include "VectorSingles.hpp"
+#include "localProcessors.hpp"
+#include "mpiCommunication.hpp"
+
+namespace pFlow::MPI
+{
+
+template<typename T, typename MemorySpace=void>
+class dataSender
+{
+public:
+
+	using BufferVectorType = VectorSingle<T, MemorySpace>;
+
+	using BufferVectorTypeHost = VectorSingle<T, HostSpace>;
+
+	using memory_space = typename BufferVectorType::memory_space;
+
+	using execution_space = typename BufferVectorType::execution_space;
+
+	// This is device vector 
+    
+
+private:
+
+	mutable BufferVectorType buffer_;
+
+	int              toProc_;
+
+	int              tag_;
+
+	mutable Request  sendRequest_ = RequestNull;
+
+public:
+
+    dataSender(const word& name, int toProc, int tag)
+    :
+        toProc_(toProc),
+        tag_(tag)
+    {}
+
+    ~dataSender()
+    {
+        if(sendRequest_ != RequestNull)
+        {
+            MPI_Request_free(&sendRequest_);
+        }
+    }
+
+    bool waitBufferForUse()const
+    {
+        if(sendRequest_ != RequestNull)
+        {
+            MPI_Wait(&sendRequest_, StatusesIgnore);
+        }
+        return true;
+    }
+
+    void sendData(
+        const localProcessors&      processors,
+        const scatteredFieldAccess<T, memory_space>&  scatterField,
+        const word& name = "dataSender::sendData"
+    )
+    {
+        using RPolicy = Kokkos::RangePolicy<
+                execution_space,
+                Kokkos::Schedule<Kokkos::Static>,
+                Kokkos::IndexType<pFlow::uint32>>;
+
+        uint32 n = scatterField.size();
+        
+        // make sure the buffer is ready to be used and free 
+        // the previous request (if any).
+        waitBufferForUse();
+
+        // clear the buffer to prevent data copy if capacity increases 
+        buffer_.clear();
+        buffer_.resize(n);
+        
+        const auto& buffView = buffer_.deviceViewAll(); 
+
+        Kokkos::parallel_for(
+            "packDataForSend::"+name,
+            RPolicy(0,n),
+            LAMBDA_HD(uint32 i)
+            {
+                buffView[i] = scatterField[i];
+            }
+        );
+        Kokkos::fence();
+        
+        CheckMPI(
+            Isend(buffer_.getSpan(),
+                toProc_,
+                tag_,
+                processors.localCommunicator(),
+                &sendRequest_
+            ), 
+            true
+        );
+                
+    }
+
+    bool recieveBackData(
+        const localProcessors&      processors,
+        uint32 numToRecieve
+    )const
+    {       
+        // make sure the buffer is ready to be used and free 
+        // the previous request (if any).
+        waitBufferForUse();
+
+        // clear the buffer to prevent data copy if capacity increases 
+        buffer_.clear();
+        buffer_.resize(numToRecieve);
+        
+        CheckMPI(
+            Irecv(
+                buffer_.getSpan(),
+                toProc_,
+                tag_,
+                processors.localCommunicator(),
+                &sendRequest_
+            ),
+            true
+        );
+
+        return true;
+    }
+
+    auto& buffer()
+    {
+        return buffer_;
+    }
+
+    const auto& buffer()const
+    {
+        return buffer_;
+    }
+
+    inline
+    void fill(const T& val)
+    {
+        waitBufferForUse();
+        buffer_.fill(val);
+    }
+
+    uint32 size()const
+    {
+        return buffer_.size();
+    }
+
+    bool sendComplete()
+    {
+        int test;
+        if(sendRequest_ != RequestNull)   
+        {
+            MPI_Test(&sendRequest_, &test, StatusIgnore);
+            return test;
+        }
+        else
+        {
+            return true;
+        }
+    }
+
+    inline
+    void resize(uint32 newSize)
+    {
+        waitBufferForUse();
+        buffer_.clear();
+        buffer_.resize(newSize);
+    }
+
+};
+
+}
+
+#endif //__dataSender_hpp__
\ No newline at end of file