44 #ifndef KOKKOS_EXECPOLICY_HPP 45 #define KOKKOS_EXECPOLICY_HPP 47 #include <Kokkos_Core_fwd.hpp> 48 #include <impl/Kokkos_Traits.hpp> 49 #include <impl/Kokkos_StaticAssert.hpp> 50 #include <impl/Kokkos_Error.hpp> 51 #include <impl/Kokkos_Tags.hpp> 52 #include <impl/Kokkos_AnalyzePolicy.hpp> 53 #include <Kokkos_Concepts.hpp> 62 ChunkSize(
int value_):value(value_) {}
86 template<
class ... Properties>
88 :
public Impl::PolicyTraits<Properties ... >
91 typedef Impl::PolicyTraits<Properties ... > traits;
93 typename traits::execution_space m_space ;
94 typename traits::index_type m_begin ;
95 typename traits::index_type m_end ;
96 typename traits::index_type m_granularity ;
97 typename traits::index_type m_granularity_mask ;
102 typedef typename traits::index_type member_type ;
103 typedef typename traits::index_type index_type;
105 KOKKOS_INLINE_FUNCTION
const typename traits::execution_space & space()
const {
return m_space ; }
106 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin ; }
107 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end ; }
112 void operator()(
const int&)
const {}
114 RangePolicy(
const RangePolicy&) =
default;
115 RangePolicy(RangePolicy&&) =
default;
117 inline RangePolicy() : m_space(), m_begin(0), m_end(0) {}
122 ,
const member_type work_begin
123 ,
const member_type work_end
125 : m_space( work_space )
126 , m_begin( work_begin < work_end ? work_begin : 0 )
127 , m_end( work_begin < work_end ? work_end : 0 )
129 , m_granularity_mask(0)
131 set_auto_chunk_size();
137 ,
const member_type work_end
140 , work_begin , work_end )
142 set_auto_chunk_size();
146 template<
class ... Args>
149 ,
const member_type work_begin
150 ,
const member_type work_end
153 : m_space( work_space )
154 , m_begin( work_begin < work_end ? work_begin : 0 )
155 , m_end( work_begin < work_end ? work_end : 0 )
157 , m_granularity_mask(0)
159 set_auto_chunk_size();
164 template<
class ... Args>
167 ,
const member_type work_end
171 , work_begin , work_end )
173 set_auto_chunk_size();
181 template<
class ... Args>
182 inline void set(Args ...) {
183 static_assert( 0 ==
sizeof...(Args),
"Kokkos::RangePolicy: unhandled constructor arguments encountered.");
186 template<
class ... Args>
187 inline void set(
const ChunkSize& chunksize, Args ... args) {
188 m_granularity = chunksize.value;
189 m_granularity_mask = m_granularity - 1;
195 return m_granularity;
201 p.m_granularity = chunk_size_;
202 p.m_granularity_mask = p.m_granularity - 1;
208 inline void set_auto_chunk_size() {
210 typename traits::index_type concurrency = traits::execution_space::concurrency();
211 if( concurrency==0 ) concurrency=1;
213 if(m_granularity > 0) {
214 if(!Impl::is_integral_power_of_two( m_granularity ))
215 Kokkos::abort(
"RangePolicy blocking granularity must be power of two" );
218 member_type new_chunk_size = 1;
219 while(new_chunk_size*100*concurrency < m_end-m_begin)
221 if(new_chunk_size < 128) {
223 while( (new_chunk_size*40*concurrency < m_end-m_begin ) && (new_chunk_size<128) )
226 m_granularity = new_chunk_size;
227 m_granularity_mask = m_granularity - 1;
236 typedef typename RangePolicy::work_tag work_tag ;
237 typedef typename RangePolicy::member_type member_type ;
239 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin ; }
240 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end ; }
246 KOKKOS_INLINE_FUNCTION
248 ,
const int part_rank
249 ,
const int part_size
251 : m_begin(0), m_end(0)
256 const member_type work_part =
257 ( ( ( ( range.end() - range.begin() ) + ( part_size - 1 ) ) / part_size )
258 + range.m_granularity_mask ) & ~member_type(range.m_granularity_mask);
260 m_begin = range.begin() + work_part * part_rank ;
261 m_end = m_begin + work_part ;
263 if ( range.end() < m_begin ) m_begin = range.end() ;
264 if ( range.end() < m_end ) m_end = range.end() ;
269 member_type m_begin ;
285 template<
class ExecSpace,
class ... Properties>
286 class TeamPolicyInternal:
public Impl::PolicyTraits<Properties ... > {
288 typedef Impl::PolicyTraits<Properties ... > traits;
292 typedef typename traits::index_type index_type;
305 template<
class FunctorType >
306 static int team_size_max(
const FunctorType & );
318 template<
class FunctorType >
319 static int team_size_recommended(
const FunctorType & );
321 template<
class FunctorType >
322 static int team_size_recommended(
const FunctorType & ,
const int&);
325 TeamPolicyInternal(
const typename traits::execution_space & ,
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 );
327 TeamPolicyInternal(
const typename traits::execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 );
330 TeamPolicyInternal(
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 );
332 TeamPolicyInternal(
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 );
343 KOKKOS_INLINE_FUNCTION
int league_size()
const ;
350 KOKKOS_INLINE_FUNCTION
int team_size()
const ;
352 inline typename traits::index_type chunk_size()
const ;
354 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE 355 inline TeamPolicyInternal set_chunk_size(
int chunk_size)
const ;
357 inline TeamPolicyInternal& set_chunk_size(
int chunk_size);
366 KOKKOS_INLINE_FUNCTION
367 typename traits::execution_space::scratch_memory_space
team_shmem()
const ;
376 KOKKOS_INLINE_FUNCTION
int team_rank()
const ;
379 KOKKOS_INLINE_FUNCTION
int team_size()
const ;
385 template<
class JoinOp >
386 KOKKOS_INLINE_FUNCTION
387 typename JoinOp::value_type
team_reduce(
const typename JoinOp::value_type
388 ,
const JoinOp & )
const ;
395 template<
typename Type >
396 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type & value )
const ;
407 template<
typename Type >
408 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type & value , Type *
const global_accum )
const ;
413 struct PerTeamValue {
415 PerTeamValue(
int arg);
418 struct PerThreadValue {
420 PerThreadValue(
int arg);
423 template<
class iType,
class ... Args>
424 struct ExtractVectorLength {
425 static inline iType value(
typename std::enable_if<std::is_integral<iType>::value,iType>::type val, Args...) {
428 static inline typename std::enable_if<!std::is_integral<iType>::value,
int>::type value(
typename std::enable_if<!std::is_integral<iType>::value,iType>::type, Args...) {
433 template<
class iType,
class ... Args>
434 inline typename std::enable_if<std::is_integral<iType>::value,iType>::type extract_vector_length(iType val, Args...) {
438 template<
class iType,
class ... Args>
439 inline typename std::enable_if<!std::is_integral<iType>::value,
int>::type extract_vector_length(iType, Args...) {
445 Impl::PerTeamValue PerTeam(
const int& arg);
446 Impl::PerThreadValue PerThread(
const int& arg);
448 struct ScratchRequest {
455 ScratchRequest(
const int& level_,
const Impl::PerTeamValue& team_value) {
457 per_team = team_value.value;
462 ScratchRequest(
const int& level_,
const Impl::PerThreadValue& thread_value) {
465 per_thread = thread_value.value;;
469 ScratchRequest(
const int& level_,
const Impl::PerTeamValue& team_value,
const Impl::PerThreadValue& thread_value) {
471 per_team = team_value.value;
472 per_thread = thread_value.value;;
476 ScratchRequest(
const int& level_,
const Impl::PerThreadValue& thread_value,
const Impl::PerTeamValue& team_value) {
478 per_team = team_value.value;
479 per_thread = thread_value.value;;
509 template<
class ... Properties>
511 Impl::TeamPolicyInternal<
512 typename Impl::PolicyTraits<Properties ... >::execution_space,
514 typedef Impl::TeamPolicyInternal<
515 typename Impl::PolicyTraits<Properties ... >::execution_space,
516 Properties ...> internal_policy;
518 typedef Impl::PolicyTraits<Properties ... > traits;
526 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 )
527 : internal_policy(typename traits::execution_space(),league_size_request,team_size_request, vector_length_request) {first_arg =
false;}
529 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 )
530 : internal_policy(typename traits::execution_space(),league_size_request,
Kokkos::AUTO(), vector_length_request) {first_arg =
false;}
533 TeamPolicy(
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 )
534 : internal_policy(league_size_request,team_size_request, vector_length_request) {first_arg =
false;}
536 TeamPolicy(
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 )
537 : internal_policy(league_size_request,
Kokkos::AUTO(), vector_length_request) {first_arg =
false;}
539 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE 541 template<
class ... Args>
542 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
int team_size_request ,
int vector_length_request,
544 : internal_policy(typename traits::execution_space(),league_size_request,team_size_request, vector_length_request) {
549 template<
class ... Args>
550 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request ,
552 : internal_policy(typename traits::execution_space(),league_size_request,
Kokkos::AUTO(), vector_length_request) {
558 template<
class ... Args>
559 TeamPolicy(
int league_size_request ,
int team_size_request ,
int vector_length_request ,
561 : internal_policy(league_size_request,team_size_request, vector_length_request) {
566 template<
class ... Args>
567 TeamPolicy(
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request ,
569 : internal_policy(league_size_request,
Kokkos::AUTO(), vector_length_request) {
575 template<
class ... Args>
576 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
int team_size_request ,
578 : internal_policy(typename traits::execution_space(),league_size_request,team_size_request,
579 Kokkos::Impl::extract_vector_length<Args...>(args...)) {
584 template<
class ... Args>
585 TeamPolicy(
const typename traits::execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
587 : internal_policy(typename traits::execution_space(),league_size_request,
Kokkos::AUTO(),
588 Kokkos::Impl::extract_vector_length<Args...>(args...)) {
594 template<
class ... Args>
595 TeamPolicy(
int league_size_request ,
int team_size_request ,
597 : internal_policy(league_size_request,team_size_request,
598 Kokkos::Impl::extract_vector_length<Args...>(args...)) {
603 template<
class ... Args>
604 TeamPolicy(
int league_size_request ,
const Kokkos::AUTO_t & ,
606 : internal_policy(league_size_request,
Kokkos::AUTO(),
607 Kokkos::Impl::extract_vector_length<Args...>(args...)) {
615 TeamPolicy(
const internal_policy& p):internal_policy(p) {first_arg =
false;}
617 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE 622 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE 623 template<
class ... Args>
624 inline void set(Args ...) {
625 static_assert( 0 ==
sizeof...(Args),
"Kokkos::TeamPolicy: unhandled constructor arguments encountered.");
628 template<
class iType,
class ... Args>
629 inline typename std::enable_if<std::is_integral<iType>::value>::type
set(iType, Args ... args) {
635 Kokkos::Impl::throw_runtime_exception(
"Kokkos::TeamPolicy: integer argument to constructor in illegal place.");
639 template<
class ... Args>
640 inline void set(
const ChunkSize& chunksize, Args ... args) {
642 internal_policy::internal_set_chunk_size(chunksize.value);
646 template<
class ... Args>
647 inline void set(
const ScratchRequest& scr_request, Args ... args) {
649 internal_policy::internal_set_scratch_size(scr_request.level,Impl::PerTeamValue(scr_request.per_team),
650 Impl::PerThreadValue(scr_request.per_thread));
654 inline TeamPolicy set_chunk_size(
int chunk)
const {
655 return TeamPolicy(internal_policy::set_chunk_size(chunk));
658 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerTeamValue& per_team)
const {
659 return TeamPolicy(internal_policy::set_scratch_size(level,per_team));
661 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerThreadValue& per_thread)
const {
662 return TeamPolicy(internal_policy::set_scratch_size(level,per_thread));
664 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerTeamValue& per_team,
const Impl::PerThreadValue& per_thread)
const {
665 return TeamPolicy(internal_policy::set_scratch_size(level, per_team, per_thread));
667 inline TeamPolicy set_scratch_size(
const int& level,
const Impl::PerThreadValue& per_thread,
const Impl::PerTeamValue& per_team)
const {
668 return TeamPolicy(internal_policy::set_scratch_size(level, per_team, per_thread));
672 inline TeamPolicy& set_chunk_size(
int chunk) {
673 static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)), internal_policy&>::value,
"internal set_chunk_size should return a reference");
674 return static_cast<TeamPolicy&
>(internal_policy::set_chunk_size(chunk));
677 inline TeamPolicy& set_scratch_size(
const int& level,
const Impl::PerTeamValue& per_team) {
678 static_assert(std::is_same<decltype(internal_policy::set_scratch_size(level,per_team)), internal_policy&>::value,
"internal set_chunk_size should return a reference");
679 return static_cast<TeamPolicy&
>(internal_policy::set_scratch_size(level,per_team));
681 inline TeamPolicy& set_scratch_size(
const int& level,
const Impl::PerThreadValue& per_thread) {
682 return static_cast<TeamPolicy&
>(internal_policy::set_scratch_size(level,per_thread));
684 inline TeamPolicy& set_scratch_size(
const int& level,
const Impl::PerTeamValue& per_team,
const Impl::PerThreadValue& per_thread) {
685 return static_cast<TeamPolicy&
>(internal_policy::set_scratch_size(level, per_team, per_thread));
687 inline TeamPolicy& set_scratch_size(
const int& level,
const Impl::PerThreadValue& per_thread,
const Impl::PerTeamValue& per_team) {
688 return static_cast<TeamPolicy&
>(internal_policy::set_scratch_size(level, per_team, per_thread));
696 template<
typename iType,
class TeamMemberType>
697 struct TeamThreadRangeBoundariesStruct {
700 KOKKOS_INLINE_FUNCTION
static 701 iType ibegin(
const iType & arg_begin
702 ,
const iType & arg_end
703 ,
const iType & arg_rank
704 ,
const iType & arg_size
707 return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ;
710 KOKKOS_INLINE_FUNCTION
static 711 iType iend(
const iType & arg_begin
712 ,
const iType & arg_end
713 ,
const iType & arg_rank
714 ,
const iType & arg_size
717 const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 );
718 return end_ < arg_end ? end_ : arg_end ;
723 typedef iType index_type;
726 enum {increment = 1};
727 const TeamMemberType& thread;
729 KOKKOS_INLINE_FUNCTION
730 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread
731 ,
const iType& arg_end
733 : start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
734 , end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
735 , thread( arg_thread )
738 KOKKOS_INLINE_FUNCTION
739 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread
740 ,
const iType& arg_begin
741 ,
const iType& arg_end
743 : start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
744 , end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
745 , thread( arg_thread )
749 template<
typename iType,
class TeamMemberType>
750 struct ThreadVectorRangeBoundariesStruct {
751 typedef iType index_type;
752 const index_type start;
753 const index_type end;
754 enum {increment = 1};
756 KOKKOS_INLINE_FUNCTION
757 constexpr ThreadVectorRangeBoundariesStruct (
const TeamMemberType,
const index_type& count ) noexcept
758 : start( static_cast<index_type>(0) )
761 KOKKOS_INLINE_FUNCTION
762 constexpr ThreadVectorRangeBoundariesStruct (
const index_type& count ) noexcept
763 : start( static_cast<index_type>(0) )
766 KOKKOS_INLINE_FUNCTION
767 constexpr ThreadVectorRangeBoundariesStruct (
const TeamMemberType,
const index_type& arg_begin,
const index_type& arg_end ) noexcept
768 : start( static_cast<index_type>(arg_begin) )
771 KOKKOS_INLINE_FUNCTION
772 constexpr ThreadVectorRangeBoundariesStruct (
const index_type& arg_begin,
const index_type& arg_end ) noexcept
773 : start( static_cast<index_type>(arg_begin) )
777 template<
class TeamMemberType>
778 struct ThreadSingleStruct {
779 const TeamMemberType& team_member;
780 KOKKOS_INLINE_FUNCTION
781 ThreadSingleStruct(
const TeamMemberType& team_member_ ) : team_member( team_member_ ) {}
784 template<
class TeamMemberType>
785 struct VectorSingleStruct {
786 const TeamMemberType& team_member;
787 KOKKOS_INLINE_FUNCTION
788 VectorSingleStruct(
const TeamMemberType& team_member_ ) : team_member( team_member_ ) {}
799 template<
typename iType,
class TeamMemberType>
800 KOKKOS_INLINE_FUNCTION
801 Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType>
810 template<
typename iType1,
typename iType2,
class TeamMemberType>
811 KOKKOS_INLINE_FUNCTION
812 Impl::TeamThreadRangeBoundariesStruct<typename std::common_type<iType1, iType2>::type, TeamMemberType>
813 TeamThreadRange(
const TeamMemberType&,
const iType1& begin,
const iType2& end );
821 template<
typename iType,
class TeamMemberType>
822 KOKKOS_INLINE_FUNCTION
823 Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType>
826 template<
typename iType,
class TeamMemberType>
827 KOKKOS_INLINE_FUNCTION
828 Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType>
829 ThreadVectorRange(
const TeamMemberType&,
const iType& arg_begin,
const iType& arg_end );
831 #if defined(KOKKOS_ENABLE_PROFILING) 834 template<
typename FunctorType,
typename TagType,
835 bool HasTag = !std::is_same<TagType, void>::value >
836 struct ParallelConstructName;
838 template<
typename FunctorType,
typename TagType>
839 struct ParallelConstructName<FunctorType, TagType, true> {
840 ParallelConstructName(std::string
const& label):label_ref(label) {
842 default_name = std::string(
typeid(FunctorType).name()) +
"/" +
843 typeid(TagType).name();
846 std::string
const&
get() {
847 return (label_ref.empty()) ? default_name : label_ref;
849 std::string
const& label_ref;
850 std::string default_name;
853 template<
typename FunctorType,
typename TagType>
854 struct ParallelConstructName<FunctorType, TagType, false> {
855 ParallelConstructName(std::string
const& label):label_ref(label) {
857 default_name = std::string(
typeid(FunctorType).name());
860 std::string
const&
get() {
861 return (label_ref.empty()) ? default_name : label_ref;
863 std::string
const& label_ref;
864 std::string default_name;
member_type chunk_size() const
return chunk_size
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy execution_policy
Tag this class as an execution policy.
KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< iType, TeamMemberType > TeamThreadRange(const TeamMemberType &, const iType &count)
Execution policy for parallel work over a threads within a team.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end, Args ... args)
Total range.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
TeamPolicy(const typename traits::execution_space &, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< iType, TeamMemberType > ThreadVectorRange(const TeamMemberType &, const iType &count)
Execution policy for a vector parallel loop.
RangePolicy set_chunk_size(int chunk_size_) const
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
Subrange for a partition's rank and size.
Execution policy for parallel work over a league of teams of threads.
RangePolicy(const member_type work_begin, const member_type work_end, Args ... args)
Total range.
Parallel execution of a functor calls the functor once with each member of the execution policy...
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.