45 #ifndef KOKKOS_EXECPOLICY_HPP 46 #define KOKKOS_EXECPOLICY_HPP 48 #include <Kokkos_Core_fwd.hpp> 49 #include <impl/Kokkos_Traits.hpp> 50 #include <impl/Kokkos_Error.hpp> 51 #include <impl/Kokkos_Tags.hpp> 52 #include <impl/Kokkos_AnalyzePolicy.hpp> 53 #include <Kokkos_Concepts.hpp> 60 struct ParallelForTag {};
61 struct ParallelScanTag {};
62 struct ParallelReduceTag {};
66 ChunkSize(
int value_) : value(value_) {}
90 template <
class... Properties>
91 class RangePolicy :
public Impl::PolicyTraits<Properties...> {
93 using traits = Impl::PolicyTraits<Properties...>;
96 typename traits::execution_space m_space;
97 typename traits::index_type m_begin;
98 typename traits::index_type m_end;
99 typename traits::index_type m_granularity;
100 typename traits::index_type m_granularity_mask;
102 template <
class... OtherProperties>
108 using member_type =
typename traits::index_type;
109 using index_type =
typename traits::index_type;
111 KOKKOS_INLINE_FUNCTION
const typename traits::execution_space& space()
const {
114 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
115 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
122 void operator()(
const int&)
const {}
124 template <
class... OtherProperties>
125 RangePolicy(
const RangePolicy<OtherProperties...>& p)
130 m_granularity(p.m_granularity),
131 m_granularity_mask(p.m_granularity_mask) {}
138 m_granularity_mask(0) {}
141 inline RangePolicy(
const typename traits::execution_space& work_space,
142 const member_type work_begin,
const member_type work_end)
143 : m_space(work_space),
144 m_begin(work_begin < work_end ? work_begin : 0),
145 m_end(work_begin < work_end ? work_end : 0),
147 m_granularity_mask(0) {
148 set_auto_chunk_size();
152 inline RangePolicy(
const member_type work_begin,
const member_type work_end)
153 :
RangePolicy(typename traits::execution_space(), work_begin, work_end) {
154 set_auto_chunk_size();
158 template <
class... Args>
159 inline RangePolicy(
const typename traits::execution_space& work_space,
160 const member_type work_begin,
const member_type work_end,
162 : m_space(work_space),
163 m_begin(work_begin < work_end ? work_begin : 0),
164 m_end(work_begin < work_end ? work_end : 0),
166 m_granularity_mask(0) {
167 set_auto_chunk_size();
172 template <
class... Args>
173 inline RangePolicy(
const member_type work_begin,
const member_type work_end,
175 :
RangePolicy(typename traits::execution_space(), work_begin, work_end) {
176 set_auto_chunk_size();
184 template <
class... Args>
185 inline void set(Args...) {
187 0 ==
sizeof...(Args),
188 "Kokkos::RangePolicy: unhandled constructor arguments encountered.");
191 template <
class... Args>
192 inline void set(
const ChunkSize& chunksize, Args... args) {
193 m_granularity = chunksize.value;
194 m_granularity_mask = m_granularity - 1;
200 inline member_type
chunk_size()
const {
return m_granularity; }
205 p.m_granularity = chunk_size_;
206 p.m_granularity_mask = p.m_granularity - 1;
212 inline void set_auto_chunk_size() {
213 int64_t concurrency =
214 static_cast<int64_t
>(traits::execution_space::concurrency());
215 if (concurrency == 0) concurrency = 1;
217 if (m_granularity > 0) {
218 if (!Impl::is_integral_power_of_two(m_granularity))
219 Kokkos::abort(
"RangePolicy blocking granularity must be power of two");
222 int64_t new_chunk_size = 1;
223 while (new_chunk_size * 100 * concurrency <
224 static_cast<int64_t>(m_end - m_begin))
226 if (new_chunk_size < 128) {
228 while ((new_chunk_size * 40 * concurrency <
229 static_cast<int64_t>(m_end - m_begin)) &&
230 (new_chunk_size < 128))
233 m_granularity = new_chunk_size;
234 m_granularity_mask = m_granularity - 1;
243 using work_tag =
typename RangePolicy<Properties...>::work_tag;
244 using member_type =
typename RangePolicy<Properties...>::member_type;
246 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin; }
247 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end; }
253 KOKKOS_INLINE_FUNCTION
256 : m_begin(0), m_end(0) {
259 const member_type work_part =
260 ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
261 range.m_granularity_mask) &
262 ~member_type(range.m_granularity_mask);
264 m_begin = range.begin() + work_part * part_rank;
265 m_end = m_begin + work_part;
267 if (range.end() < m_begin) m_begin = range.end();
268 if (range.end() < m_end) m_end = range.end();
289 template <
class ExecSpace,
class... Properties>
290 class TeamPolicyInternal :
public Impl::PolicyTraits<Properties...> {
292 using traits = Impl::PolicyTraits<Properties...>;
295 using index_type =
typename traits::index_type;
308 template <
class FunctorType>
309 static int team_size_max(
const FunctorType&);
321 template <
class FunctorType>
322 static int team_size_recommended(
const FunctorType&);
324 template <
class FunctorType>
325 static int team_size_recommended(
const FunctorType&,
const int&);
327 template <
class FunctorType>
328 int team_size_recommended(
const FunctorType& functor,
329 const int vector_length);
333 TeamPolicyInternal(
const typename traits::execution_space&,
334 int league_size_request,
int team_size_request,
335 int vector_length_request = 1);
337 TeamPolicyInternal(
const typename traits::execution_space&,
338 int league_size_request,
const Kokkos::AUTO_t&,
339 int vector_length_request = 1);
343 TeamPolicyInternal(
int league_size_request,
int team_size_request,
344 int vector_length_request = 1);
346 TeamPolicyInternal(
int league_size_request,
const Kokkos::AUTO_t&,
347 int vector_length_request = 1);
358 KOKKOS_INLINE_FUNCTION
int league_size()
const;
365 KOKKOS_INLINE_FUNCTION
int team_size()
const;
369 inline bool impl_auto_team_size()
const;
372 inline bool impl_auto_vector_length()
const;
374 static int vector_length_max();
376 KOKKOS_INLINE_FUNCTION
int impl_vector_length()
const;
378 inline typename traits::index_type chunk_size()
const;
380 inline TeamPolicyInternal& set_chunk_size(
int chunk_size);
387 KOKKOS_INLINE_FUNCTION
388 typename traits::execution_space::scratch_memory_space
team_shmem()
const;
397 KOKKOS_INLINE_FUNCTION
int team_rank()
const;
400 KOKKOS_INLINE_FUNCTION
int team_size()
const;
407 template <
class JoinOp>
408 KOKKOS_INLINE_FUNCTION
typename JoinOp::value_type
team_reduce(
409 const typename JoinOp::value_type,
const JoinOp&)
const;
416 template <
typename Type>
417 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type& value)
const;
428 template <
typename Type>
429 KOKKOS_INLINE_FUNCTION Type
team_scan(
const Type& value,
430 Type*
const global_accum)
const;
434 struct PerTeamValue {
436 PerTeamValue(
int arg);
439 struct PerThreadValue {
441 PerThreadValue(
int arg);
444 template <
class iType,
class... Args>
445 struct ExtractVectorLength {
446 static inline iType value(
447 typename std::enable_if<std::is_integral<iType>::value, iType>::type val,
452 typename std::enable_if<!std::is_integral<iType>::value,
int>::type
454 typename std::enable_if<!std::is_integral<iType>::value, iType>::type,
460 template <
class iType,
class... Args>
461 inline typename std::enable_if<std::is_integral<iType>::value, iType>::type
462 extract_vector_length(iType val, Args...) {
466 template <
class iType,
class... Args>
467 inline typename std::enable_if<!std::is_integral<iType>::value,
int>::type
468 extract_vector_length(iType, Args...) {
474 Impl::PerTeamValue PerTeam(
const int& arg);
475 Impl::PerThreadValue PerThread(
const int& arg);
477 struct ScratchRequest {
483 inline ScratchRequest(
const int& level_,
484 const Impl::PerTeamValue& team_value) {
486 per_team = team_value.value;
490 inline ScratchRequest(
const int& level_,
491 const Impl::PerThreadValue& thread_value) {
494 per_thread = thread_value.value;
497 inline ScratchRequest(
const int& level_,
const Impl::PerTeamValue& team_value,
498 const Impl::PerThreadValue& thread_value) {
500 per_team = team_value.value;
501 per_thread = thread_value.value;
504 inline ScratchRequest(
const int& level_,
505 const Impl::PerThreadValue& thread_value,
506 const Impl::PerTeamValue& team_value) {
508 per_team = team_value.value;
509 per_thread = thread_value.value;
514 void team_policy_check_valid_storage_level_argument(
int level);
542 template <
class... Properties>
544 :
public Impl::TeamPolicyInternal<
545 typename Impl::PolicyTraits<Properties...>::execution_space,
547 using internal_policy = Impl::TeamPolicyInternal<
548 typename Impl::PolicyTraits<Properties...>::execution_space,
551 template <
class... OtherProperties>
555 using traits = Impl::PolicyTraits<Properties...>;
563 int league_size_request,
int team_size_request,
564 int vector_length_request = 1)
565 : internal_policy(space_, league_size_request, team_size_request,
566 vector_length_request) {}
568 TeamPolicy(
const typename traits::execution_space& space_,
569 int league_size_request,
const Kokkos::AUTO_t&,
570 int vector_length_request = 1)
571 : internal_policy(space_, league_size_request,
Kokkos::AUTO(),
572 vector_length_request) {}
574 TeamPolicy(
const typename traits::execution_space& space_,
575 int league_size_request,
const Kokkos::AUTO_t&,
576 const Kokkos::AUTO_t&)
577 : internal_policy(space_, league_size_request,
Kokkos::AUTO(),
579 TeamPolicy(
const typename traits::execution_space& space_,
580 int league_size_request,
const int team_size_request,
581 const Kokkos::AUTO_t&)
582 : internal_policy(space_, league_size_request, team_size_request,
587 int vector_length_request = 1)
588 : internal_policy(league_size_request, team_size_request,
589 vector_length_request) {}
591 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
592 int vector_length_request = 1)
593 : internal_policy(league_size_request,
Kokkos::AUTO(),
594 vector_length_request) {}
596 TeamPolicy(
int league_size_request,
const Kokkos::AUTO_t&,
597 const Kokkos::AUTO_t&)
598 : internal_policy(league_size_request,
Kokkos::AUTO(),
Kokkos::AUTO()) {}
599 TeamPolicy(
int league_size_request,
const int team_size_request,
600 const Kokkos::AUTO_t&)
601 : internal_policy(league_size_request, team_size_request,
604 template <
class... OtherProperties>
605 TeamPolicy(
const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
608 internal_policy::traits::operator=(p);
612 TeamPolicy(
const internal_policy& p) : internal_policy(p) {}
615 inline TeamPolicy& set_chunk_size(
int chunk) {
616 static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)),
617 internal_policy&>::value,
618 "internal set_chunk_size should return a reference");
619 return static_cast<TeamPolicy&
>(internal_policy::set_chunk_size(chunk));
622 inline TeamPolicy& set_scratch_size(
const int& level,
623 const Impl::PerTeamValue& per_team) {
624 static_assert(std::is_same<decltype(internal_policy::set_scratch_size(
626 internal_policy&>::value,
627 "internal set_chunk_size should return a reference");
629 team_policy_check_valid_storage_level_argument(level);
630 return static_cast<TeamPolicy&
>(
631 internal_policy::set_scratch_size(level, per_team));
633 inline TeamPolicy& set_scratch_size(
const int& level,
634 const Impl::PerThreadValue& per_thread) {
635 team_policy_check_valid_storage_level_argument(level);
636 return static_cast<TeamPolicy&
>(
637 internal_policy::set_scratch_size(level, per_thread));
639 inline TeamPolicy& set_scratch_size(
const int& level,
640 const Impl::PerTeamValue& per_team,
641 const Impl::PerThreadValue& per_thread) {
642 team_policy_check_valid_storage_level_argument(level);
643 return static_cast<TeamPolicy&
>(
644 internal_policy::set_scratch_size(level, per_team, per_thread));
646 inline TeamPolicy& set_scratch_size(
const int& level,
647 const Impl::PerThreadValue& per_thread,
648 const Impl::PerTeamValue& per_team) {
649 team_policy_check_valid_storage_level_argument(level);
650 return static_cast<TeamPolicy&
>(
651 internal_policy::set_scratch_size(level, per_team, per_thread));
657 template <
typename iType,
class TeamMemberType>
658 struct TeamThreadRangeBoundariesStruct {
660 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
661 const iType& arg_end,
662 const iType& arg_rank,
663 const iType& arg_size) {
665 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
668 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
669 const iType& arg_end,
670 const iType& arg_rank,
671 const iType& arg_size) {
674 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
675 return end_ < arg_end ? end_ : arg_end;
679 using index_type = iType;
682 enum { increment = 1 };
683 const TeamMemberType& thread;
685 KOKKOS_INLINE_FUNCTION
686 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
687 const iType& arg_end)
689 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
690 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
691 thread(arg_thread) {}
693 KOKKOS_INLINE_FUNCTION
694 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread,
695 const iType& arg_begin,
const iType& arg_end)
696 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
697 arg_thread.team_size())),
698 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
699 arg_thread.team_size())),
700 thread(arg_thread) {}
703 template <
typename iType,
class TeamMemberType>
704 struct TeamVectorRangeBoundariesStruct {
706 KOKKOS_INLINE_FUNCTION
static iType ibegin(
const iType& arg_begin,
707 const iType& arg_end,
708 const iType& arg_rank,
709 const iType& arg_size) {
711 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
714 KOKKOS_INLINE_FUNCTION
static iType iend(
const iType& arg_begin,
715 const iType& arg_end,
716 const iType& arg_rank,
717 const iType& arg_size) {
720 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
721 return end_ < arg_end ? end_ : arg_end;
725 using index_type = iType;
728 enum { increment = 1 };
729 const TeamMemberType& thread;
731 KOKKOS_INLINE_FUNCTION
732 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
733 const iType& arg_end)
735 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
736 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
737 thread(arg_thread) {}
739 KOKKOS_INLINE_FUNCTION
740 TeamVectorRangeBoundariesStruct(
const TeamMemberType& arg_thread,
741 const iType& arg_begin,
const iType& arg_end)
742 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
743 arg_thread.team_size())),
744 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
745 arg_thread.team_size())),
746 thread(arg_thread) {}
749 template <
typename iType,
class TeamMemberType>
750 struct ThreadVectorRangeBoundariesStruct {
751 using index_type = iType;
752 const index_type start;
753 const index_type end;
754 enum { increment = 1 };
756 KOKKOS_INLINE_FUNCTION
757 constexpr ThreadVectorRangeBoundariesStruct(
const TeamMemberType,
758 const index_type& count) noexcept
759 : start(static_cast<index_type>(0)), end(count) {}
761 KOKKOS_INLINE_FUNCTION
762 constexpr ThreadVectorRangeBoundariesStruct(
const index_type& count) noexcept
763 : start(static_cast<index_type>(0)), end(count) {}
765 KOKKOS_INLINE_FUNCTION
766 constexpr ThreadVectorRangeBoundariesStruct(
767 const TeamMemberType,
const index_type& arg_begin,
768 const index_type& arg_end) noexcept
769 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
771 KOKKOS_INLINE_FUNCTION
772 constexpr ThreadVectorRangeBoundariesStruct(
773 const index_type& arg_begin,
const index_type& arg_end) noexcept
774 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
777 template <
class TeamMemberType>
778 struct ThreadSingleStruct {
779 const TeamMemberType& team_member;
780 KOKKOS_INLINE_FUNCTION
781 ThreadSingleStruct(
const TeamMemberType& team_member_)
782 : team_member(team_member_) {}
785 template <
class TeamMemberType>
786 struct VectorSingleStruct {
787 const TeamMemberType& team_member;
788 KOKKOS_INLINE_FUNCTION
789 VectorSingleStruct(
const TeamMemberType& team_member_)
790 : team_member(team_member_) {}
802 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
803 KOKKOS_INLINE_FUNCTION_DELETED
804 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
805 TeamThreadRange(
const TeamMemberType&,
const iType& count) =
delete;
814 template <
typename iType1,
typename iType2,
class TeamMemberType,
815 class _never_use_this_overload>
816 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
817 typename std::common_type<iType1, iType2>::type, TeamMemberType>
818 TeamThreadRange(
const TeamMemberType&,
const iType1& begin,
819 const iType2& end) =
delete;
828 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
829 KOKKOS_INLINE_FUNCTION_DELETED
830 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
831 TeamVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
840 template <
typename iType1,
typename iType2,
class TeamMemberType,
841 class _never_use_this_overload>
842 KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
843 typename std::common_type<iType1, iType2>::type, TeamMemberType>
844 TeamVectorRange(
const TeamMemberType&,
const iType1& begin,
845 const iType2& end) =
delete;
854 template <
typename iType,
class TeamMemberType,
class _never_use_this_overload>
855 KOKKOS_INLINE_FUNCTION_DELETED
856 Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
857 ThreadVectorRange(
const TeamMemberType&,
const iType& count) =
delete;
859 template <
typename iType1,
typename iType2,
class TeamMemberType,
860 class _never_use_this_overload>
861 KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
862 typename std::common_type<iType1, iType2>::type, TeamMemberType>
863 ThreadVectorRange(
const TeamMemberType&,
const iType1& arg_begin,
864 const iType2& arg_end) =
delete;
868 template <
typename FunctorType,
typename TagType,
869 bool HasTag = !std::is_same<TagType, void>::value>
870 struct ParallelConstructName;
872 template <
typename FunctorType,
typename TagType>
873 struct ParallelConstructName<FunctorType, TagType, true> {
874 ParallelConstructName(std::string
const& label) : label_ref(label) {
876 default_name = std::string(
typeid(FunctorType).name()) +
"/" +
877 typeid(TagType).name();
880 std::string
const&
get() {
881 return (label_ref.empty()) ? default_name : label_ref;
883 std::string
const& label_ref;
884 std::string default_name;
887 template <
typename FunctorType,
typename TagType>
888 struct ParallelConstructName<FunctorType, TagType, false> {
889 ParallelConstructName(std::string
const& label) : label_ref(label) {
891 default_name = std::string(
typeid(FunctorType).name());
894 std::string
const&
get() {
895 return (label_ref.empty()) ? default_name : label_ref;
897 std::string
const& label_ref;
898 std::string default_name;
909 template <
class PatternTag,
class... Args>
910 struct PatternImplSpecializationFromTag;
912 template <
class... Args>
913 struct PatternImplSpecializationFromTag<
Kokkos::ParallelForTag, Args...>
914 : identity<ParallelFor<Args...>> {};
916 template <
class... Args>
917 struct PatternImplSpecializationFromTag<
Kokkos::ParallelReduceTag, Args...>
918 : identity<ParallelReduce<Args...>> {};
920 template <
class... Args>
921 struct PatternImplSpecializationFromTag<
Kokkos::ParallelScanTag, Args...>
922 : identity<ParallelScan<Args...>> {};
924 template <
class PatternImpl>
925 struct PatternTagFromImplSpecialization;
927 template <
class... Args>
928 struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
929 : identity<ParallelForTag> {};
931 template <
class... Args>
932 struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
933 : identity<ParallelReduceTag> {};
935 template <
class... Args>
936 struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
937 : identity<ParallelScanTag> {};
RangePolicy(const member_type work_begin, const member_type work_end, Args... args)
Total range.
member_type chunk_size() const
return chunk_size
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end, Args... args)
Total range.
RangePolicy(const typename traits::execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
RangePolicy set_chunk_size(int chunk_size_) const
set chunk_size to a discrete value
Execution policy for work over a range of an integral type.
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
Subrange for a partition's rank and size.
Execution policy for parallel work over a league of teams of threads.
Parallel execution of a functor calls the functor once with each member of the execution policy...
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.