diff --git a/src/phasicFlow/Kokkos/KokkosTypes.H b/src/phasicFlow/Kokkos/KokkosTypes.H new file mode 100644 index 00000000..63ff7d40 --- /dev/null +++ b/src/phasicFlow/Kokkos/KokkosTypes.H @@ -0,0 +1,135 @@ +/*------------------------------- phasicFlow --------------------------------- + O C enter of + O O E ngineering and + O O M ultiscale modeling of + OOOOOOO F luid flow +------------------------------------------------------------------------------ + Copyright (C): www.cemf.ir + email: hamid.r.norouzi AT gmail.com +------------------------------------------------------------------------------ +Licence: + This file is part of phasicFlow code. It is a free software for simulating + granular and multiphase flows. You can redistribute it and/or modify it under + the terms of GNU General Public License v3 or any other later versions. + + phasicFlow is distributed to help others in their research in the field of + granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +-----------------------------------------------------------------------------*/ + +#ifndef __KokkosTypes_H__ +#define __KokkosTypes_H__ + + +#include +#include +#include + + +namespace pFlow +{ + +class DeviceSide{}; +class HostSide{}; + +template +struct selectSide{}; + +using HostSpace = Kokkos::HostSpace; +using Serial = Kokkos::Serial; + +#ifdef _OPENMP +using OpenMP = Kokkos::OpenMP; +#endif + +using DefaultHostExecutionSpace = Kokkos::DefaultHostExecutionSpace; +using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; + + + +template + using kPair = Kokkos::pair; + +using range = kPair; + +using range64 = kPair; + +template + using ViewTypeScalar = Kokkos::View; + +template + using ViewType1D = Kokkos::View; + +template + using ViewType3D = Kokkos::View; + +template + using unorderedMap = Kokkos::UnorderedMap; + +template + using unorderedSet = Kokkos::UnorderedMap; + +template + using deviceHashMap= Kokkos::UnorderedMap; + +template + using hostHashMap= Kokkos::UnorderedMap; + +template + using deviceHashSet= Kokkos::UnorderedMap; + +template + using hostHashSet = Kokkos::UnorderedMap; + +// a 1D array (vector) with default device (memory space and execution space) +template + using deviceViewTypeScalar = Kokkos::View; + +template + using deviceViewType1D = Kokkos::View; + +template + using deviceViewType2D = Kokkos::View; + + +// a 1D array (vector with host memeory space) +template + using hostViewTypeScalar = Kokkos::View; + +template + using hostViewType1D = Kokkos::View; + +template + using hostViewType2D = Kokkos::View; + + +#ifdef __CUDACC__ +using Cuda = Kokkos::Cuda; +template + using cudaViewTypeScalar = Kokkos::View; + +template + using cudaViewType1D = Kokkos::View; + +template + using cudaViewType2D = Kokkos::View; +#endif + + +template +using deviceAtomicViewType1D = + Kokkos::View< + T*, + Kokkos::MemoryTraits::value?0:Kokkos::Atomic>>; + +template +using deviceAtomicViewType3D = + Kokkos::View< + T***, + Kokkos::MemoryTraits::value?0:Kokkos::Atomic>>; + + +} // pFlow + +#endif //__KokkosTypes_H__ diff --git a/src/phasicFlow/Kokkos/KokkosUtilities.H b/src/phasicFlow/Kokkos/KokkosUtilities.H new file mode 100644 index 00000000..972565f5 --- /dev/null +++ b/src/phasicFlow/Kokkos/KokkosUtilities.H @@ -0,0 +1,92 @@ +/*------------------------------- phasicFlow --------------------------------- + O C enter of + O O E ngineering and + O O M ultiscale modeling of + OOOOOOO F luid flow +------------------------------------------------------------------------------ + Copyright (C): www.cemf.ir + email: hamid.r.norouzi AT gmail.com +------------------------------------------------------------------------------ +Licence: + This file is part of phasicFlow code. It is a free software for simulating + granular and multiphase flows. You can redistribute it and/or modify it under + the terms of GNU General Public License v3 or any other later versions. + + phasicFlow is distributed to help others in their research in the field of + granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +-----------------------------------------------------------------------------*/ + +#ifndef __KokkosUtilities_H__ +#define __KokkosUtilities_H__ + + +#include "KokkosTypes.H" + + +namespace pFlow +{ + +template +INLINE_FUNCTION_H +bool constexpr isHostAccessible() +{ + return Kokkos::SpaceAccessibility::accessible; +} + +template +INLINE_FUNCTION_H +bool constexpr areAccessible() +{ + return Kokkos::SpaceAccessibility::accessible; +} + +template < + typename Type, + typename... Properties> +INLINE_FUNCTION_H +void realloc( ViewType1D& view, int32 len) +{ + Kokkos::realloc(view, len); +} + +template < + typename Type, + typename... Properties> +INLINE_FUNCTION_H +void reallocNoInit(ViewType1D& view, int32 len) +{ + using ViewType = ViewType1D; + word vl = view.label(); + view = ViewType(); // Deallocate first + view = ViewType( + Kokkos::view_alloc( + Kokkos::WithoutInitializing, + vl), + len); +} + +template < + typename Type, + typename... Properties> +INLINE_FUNCTION_H +void reallocFill( ViewType1D& view, int32 len, Type val) +{ + reallocNoInit(view, len); + Kokkos::deep_copy(view, val); +} + + +template +INLINE_FUNCTION_H +void swapViews(ViewType& v1, ViewType &v2) +{ + auto tmp = v1; + v1 = v2; + v2 = tmp; +} + +} // pFlow + +#endif //__KokkosUtilities_H__ diff --git a/src/phasicFlow/Kokkos/ViewAlgorithms.H b/src/phasicFlow/Kokkos/ViewAlgorithms.H new file mode 100644 index 00000000..4d944a87 --- /dev/null +++ b/src/phasicFlow/Kokkos/ViewAlgorithms.H @@ -0,0 +1,593 @@ +/*------------------------------- phasicFlow --------------------------------- + O C enter of + O O E ngineering and + O O M ultiscale modeling of + OOOOOOO F luid flow +------------------------------------------------------------------------------ + Copyright (C): www.cemf.ir + email: hamid.r.norouzi AT gmail.com +------------------------------------------------------------------------------ +Licence: + This file is part of phasicFlow code. It is a free software for simulating + granular and multiphase flows. You can redistribute it and/or modify it under + the terms of GNU General Public License v3 or any other later versions. + + phasicFlow is distributed to help others in their research in the field of + granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +-----------------------------------------------------------------------------*/ + +#ifndef __ViewAlgorithms_H__ +#define __ViewAlgorithms_H__ + + +#include "numericConstants.H" +#include "KokkosUtilities.H" +#include "kokkosAlgorithms.H" +#include "stdAlgorithms.H" +#include "cudaAlgorithms.H" + + +namespace pFlow +{ + +inline const size_t maxSizeToSerial__ = 64; + +template +INLINE_FUNCTION_H +int32 count( + const ViewType1D& view, + int32 start, + int32 end, + const T& val) +{ + using ExecutionSpace = typename ViewType1D::execution_space; + + int32 numElems = end-start; + + if constexpr( isHostAccessible()) + { + if(numElems( + view.data()+start, + numElems, + val); + } + } + + return pFlow::algorithms::KOKKOS::count( + view.data()+start, + numElems, + val); +} + +template +INLINE_FUNCTION_H +void fill +( + ViewType1D& view, + range span, + T val +) +{ + using ExecutionSpace = typename ViewType1D::execution_space; + + + if constexpr( isHostAccessible()) + { + int32 numElems = span.second-span.first; + if( numElems( + view.data()+span.first, + numElems, + val); + return; + } + } + + auto subV = Kokkos::subview(view, span); + Kokkos::deep_copy(subV, val); +} + +template +void fill +( + ViewType1D& view, + int32 start, + int32 end, + T val +) +{ + fill(view, range(start,end),val); +} + +template< + typename Type, + typename... properties> +void fillSequence( + ViewType1D& view, + int32 start, + int32 end, + const Type startVal + ) +{ + + using ExecutionSpace = typename ViewType1D::execution_space; + int32 numElems = end-start; + + if constexpr( isHostAccessible()) + { + if(numElems( + view.data()+start, + numElems, + startVal); + return ; + } + } + + pFlow::algorithms::KOKKOS::fillSequence( + view.data()+start, + numElems, + startVal); + + return ; +} + + +template< + typename Type, + typename... properties, + typename indexType, + typename... indexProperties> +bool fillSelected( + ViewType1D view, + const ViewType1D indices, + const int32 numElems, + const Type val, + typename std::enable_if_t< + areAccessible< + typename ViewType1D::execution_space, + typename ViewType1D::memory_space>(), + bool> = true ) +{ + + using ExecutionSpace = typename ViewType1D::execution_space; + + if constexpr( isHostAccessible()) + { + if(numElems( + view.data(), + indices.data(), + numElems, + val); + return true; + } + } + + pFlow::algorithms::KOKKOS::fillSelected( + view.data(), + indices.data(), + numElems, + val); + + return true; +} + +template< + typename Type, + typename... properties, + typename indexType, + typename... indexProperties> + //typename valType> //, + //typename... valProperties> +bool fillSelected( + ViewType1D view, + const ViewType1D indices, + const ViewType1D vals, + const int32 numElems , + typename std::enable_if_t< + areAccessible< + typename ViewType1D::execution_space, + typename ViewType1D::memory_space>(), + bool> = true ) +{ + + using ExecutionSpace = typename ViewType1D::execution_space; + + + + if constexpr( isHostAccessible()) + { + if(numElems( + view.data(), + indices.data(), + vals.data(), + numElems + ); + return true; + } + } + + + pFlow::algorithms::KOKKOS::fillSelected( + view.data(), + indices.data(), + vals.data(), + numElems + ); + + return true; +} + + +template +INLINE_FUNCTION_H +T min( + const ViewType1D& view, + int32 start, + int32 end) +{ + + using ExecutionSpace = typename ViewType1D::execution_space; + + int32 numElems = end-start; + + if constexpr( isHostAccessible()) + { + if(numElems( + view.data()+start, + numElems); + } + } + + return + pFlow::algorithms::KOKKOS::min( + view.data()+start, + numElems); +} + +template +INLINE_FUNCTION_H +T max( + const ViewType1D& view, + int32 start, + int32 end) +{ + + using ExecutionSpace = typename ViewType1D::execution_space; + + int32 numElems = end-start; + + if constexpr( isHostAccessible()) + { + if(numElems( + view.data()+start, + numElems); + } + } + + return + pFlow::algorithms::KOKKOS::max( + view.data()+start, + numElems); +} + +template < + typename dType, + typename... dProperties, + typename sType, + typename... sProperties> +INLINE_FUNCTION_H +void copy( + const ViewType1D& dst, + const ViewType1D& src + ) +{ + Kokkos::deep_copy(dst,src); +} + +template < + typename dType, + typename... dProperties, + typename sType, + typename... sProperties> +INLINE_FUNCTION_H +void copy( + const ViewType1D& dst, + int32 dStart, + const ViewType1D& src, + int32 sStart, + int32 sEnd + ) +{ + + range sSpan(sStart,sEnd); + range dSpan(dStart,dStart+(sEnd-sStart)); + + auto srcSub = Kokkos::subview(src, sSpan); + auto dstSub = Kokkos::subview(dst, dSpan); + + Kokkos::deep_copy(dstSub,srcSub); +} + +template < + typename dType, + typename sType, + typename... sProperties> +INLINE_FUNCTION_H +void getNth( + dType& dst, + const ViewType1D& src, + const int32 n + ) +{ + range span(n,n+1); + auto subV = Kokkos::subview(src, span); + hostViewType1D dstView("getNth",1); + Kokkos::deep_copy(dstView,subV); + dst = *dstView.data(); +} + + +template +INLINE_FUNCTION_H +void sort( + ViewType1D& view, + int32 start, + int32 end) +{ + + using ExecutionSpace = typename ViewType1D::execution_space; + + int32 numElems = end-start; + + if constexpr( isHostAccessible()) + { + if(numElems( + view.data()+start, + numElems); + return; + } + else + { + pFlow::algorithms::STD::sort( + view.data()+start, + numElems); + return; + } + } + +#ifdef __CUDACC__ + + pFlow::algorithms::CUDA::sort( + view.data()+start, + numElems); +#else + static_assert("sort on device is not defined!"); + +#endif + + return; +} + +template +INLINE_FUNCTION_H +void sort( + ViewType1D& view, + int32 start, + int32 end, + CompareFunc compare) +{ + + using ExecutionSpace = typename ViewType1D::execution_space; + + int32 numElems = end-start; + + if constexpr( isHostAccessible()) + { + if(numElems( + view.data()+start, + numElems, + compare); + return; + } + else + { + pFlow::algorithms::STD::sort( + view.data()+start, + numElems, + compare); + return; + } + } + +#ifdef __CUDACC__ + + pFlow::algorithms::CUDA::sort( + view.data()+start, + numElems, + compare); +#else + static_assert("sort on device is not defined!"); + +#endif + + return; +} + +template< + typename Type, + typename... properties, + typename permType, + typename... permProperties> +void permuteSort( + const ViewType1D& view, + int32 start, + int32 end, + ViewType1D& permuteView, + int32 permStart, + typename std::enable_if_t< + areAccessible< + typename ViewType1D::execution_space, + typename ViewType1D::memory_space>(), + bool> = true ) +{ + using ExecutionSpace = typename ViewType1D::execution_space; + + int32 numElems = end-start; + + if constexpr( isHostAccessible()) + { + if(numElems( + view.data()+start, + permuteView.data()+permStart, + numElems ); + return; + } + else + { + pFlow::algorithms::STD::permuteSort( + view.data()+start, + permuteView.data()+permStart, + numElems); + return; + } + } + +#ifdef __CUDACC__ + + pFlow::algorithms::CUDA::permuteSort( + view.data()+start, + permuteView.data()+permStart, + numElems); +#else + static_assert("sort on device is not defined!"); + +#endif + +} + +template< + typename Type, + typename... properties> +INLINE_FUNCTION_HD +int32 binarySearch( + const ViewType1D& view, + int32 start, + int32 end, + const Type& val) +{ + + if(end<=start)return -1; + + if(auto res = + pFlow::algorithms::binarySearch(view.data()+start,end-start,val); res>=0) { + return res+start; + } + else{ + return res; + } +} + +template< + typename Type, + typename... properties, + typename dType, + typename... dProperties> +void exclusiveScan( + const ViewType1D& view, + int32 start, + int32 end, + ViewType1D& dView, + int32 dStart, + typename std::enable_if_t< + areAccessible< + typename ViewType1D::execution_space, + typename ViewType1D::memory_space>(), + bool> = true ) +{ + using ExecutionSpace = typename ViewType1D::execution_space; + + int32 numElems = end-start; + if constexpr( isHostAccessible()) + { + if(numElems( + view.data()+start, + dView.data()+dStart, + numElems); + return; + } + } + + pFlow::algorithms::KOKKOS::exclusiveScan( + view.data()+start, + dView.data()+dStart, + numElems); +} + + +template< + typename Type, + typename... properties, + typename dType, + typename... dProperties> +void inclusiveScan( + const ViewType1D& view, + int32 start, + int32 end, + ViewType1D& dView, + int32 dStart, + typename std::enable_if_t< + areAccessible< + typename ViewType1D::execution_space, + typename ViewType1D::memory_space>(), + bool> = true ) +{ + using ExecutionSpace = typename ViewType1D::execution_space; + + int32 numElems = end-start; + if constexpr( isHostAccessible()) + { + if(numElems( + view.data()+start, + dView.data()+dStart, + numElems); + return; + } + } + + pFlow::algorithms::KOKKOS::inclusiveScan( + view.data()+start, + dView.data()+dStart, + numElems); +} + +} // pFlow + + +#endif // Viewalgorithms diff --git a/src/phasicFlow/Kokkos/baseAlgorithms.H b/src/phasicFlow/Kokkos/baseAlgorithms.H new file mode 100644 index 00000000..2446509c --- /dev/null +++ b/src/phasicFlow/Kokkos/baseAlgorithms.H @@ -0,0 +1,245 @@ +/*------------------------------- phasicFlow --------------------------------- + O C enter of + O O E ngineering and + O O M ultiscale modeling of + OOOOOOO F luid flow +------------------------------------------------------------------------------ + Copyright (C): www.cemf.ir + email: hamid.r.norouzi AT gmail.com +------------------------------------------------------------------------------ +Licence: + This file is part of phasicFlow code. It is a free software for simulating + granular and multiphase flows. You can redistribute it and/or modify it under + the terms of GNU General Public License v3 or any other later versions. + + phasicFlow is distributed to help others in their research in the field of + granular and multiphase flows, but WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +-----------------------------------------------------------------------------*/ + +#ifndef __baseAlgorithms_H__ +#define __baseAlgorithms_H__ + + +#include "numericConstants.H" + +inline const size_t sizeToSerial__ = 64; + +namespace pFlow +{ + +// counts the number of elements that matches val +// the execution space is selected based on the View::execution_spcae +/*template +INLINE_FUNCTION_H +size_t count( + const ViewType1D& view, + size_t start, + size_t end, + const T& val + ) +{ + + auto RP = Kokkos::RangePolicy< + Kokkos::IndexType, + typename ViewType1D::execution_space >(start, end); + + size_t totalNum=0; + Kokkos::parallel_reduce( + "baseAlgorithms-count", + RP, + LAMBDA_HD(label i, size_t & valueToUpdate){ + if( equal(view[i], val) ) valueToUpdate += 1; + }, totalNum ); + + return totalNum; +}*/ + + +template +INLINE_FUNCTION_H +T min( const ViewType1D& view, size_t start, size_t end ) +{ + + T minValue = largestPositive(); + + auto RP = Kokkos::RangePolicy< + Kokkos::IndexType, + typename ViewType1D::execution_space >(start, end); + + Kokkos::parallel_reduce("baseAlgorithms-min", + RP, + LAMBDA_HD(label i, T& valueToUpdate){ + valueToUpdate = min(view[i],valueToUpdate); + }, + Kokkos :: Min < T >( minValue ) + ); + return minValue; +} + +template +INLINE_FUNCTION_H +T max( const ViewType1D& view, size_t start, size_t end ) +{ + + T maxValue = largestNegative(); + + auto RP = Kokkos::RangePolicy< + Kokkos::IndexType, + typename ViewType1D::execution_space >(start, end); + + Kokkos::parallel_reduce("baseAlgorithms-max", + RP, + LAMBDA_HD(label i, T& valueToUpdate){ + valueToUpdate = max(view[i],valueToUpdate); + }, + Kokkos::Max( maxValue ) + ); + return maxValue; +} + +template +INLINE_FUNCTION_H +T min_serial(const ViewType1D& view, size_t start, size_t end) +{ + T minValue = largestPositive(); + for(label i=start; i +INLINE_FUNCTION_H +T max_serial(const ViewType1D& view, size_t start, size_t end) +{ + T maxValue = largestNegative(); + for(label i=start; i +void apply_to_each(const ViewType1D& view, size_t start, size_t end, UnaryFunction func) +{ + auto RP = Kokkos::RangePolicy< + Kokkos::IndexType, + typename ViewType1D::execution_space >(start, end); + + Kokkos::parallel_for("baseAlgorithms-for_each", + RP, + LAMBDA_HD(label i){ + view[i] = func(i); + } + ); +} + + +template +void insertSetElementH +( + ViewType1D& view, + hostViewType1D