Kokkos Core Kernels Package  Version of the Day
Kokkos_Parallel.hpp
Go to the documentation of this file.
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
47 
48 #ifndef KOKKOS_PARALLEL_HPP
49 #define KOKKOS_PARALLEL_HPP
50 
51 #include <cstddef>
52 #include <Kokkos_Core_fwd.hpp>
53 #include <Kokkos_View.hpp>
54 #include <Kokkos_ExecPolicy.hpp>
55 
56 #include <impl/Kokkos_Tools.hpp>
57 #include <type_traits>
58 #include <typeinfo>
59 
60 #include <impl/Kokkos_Tags.hpp>
61 #include <impl/Kokkos_Traits.hpp>
62 #include <impl/Kokkos_FunctorAnalysis.hpp>
63 #include <impl/Kokkos_FunctorAdapter.hpp>
64 
65 #ifdef KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
66 #include <iostream>
67 #endif
68 
69 //----------------------------------------------------------------------------
70 //----------------------------------------------------------------------------
71 
72 namespace Kokkos {
73 namespace Impl {
74 
75 template <class T, class = void>
76 struct is_detected_execution_space : std::false_type {
77  using type = not_a_type;
78 };
79 
80 template <class T>
81 struct is_detected_execution_space<T, void_t<typename T::execution_space>>
82  : std::true_type {
83  using type = typename T::execution_space;
84 };
85 
86 template <class T>
87 using detected_execution_space_t =
88  typename is_detected_execution_space<T>::type;
89 
90 template <class T, class = void>
91 struct is_detected_device_type : std::false_type {
92  using type = not_a_type;
93 };
94 
95 template <class T>
96 struct is_detected_device_type<T, void_t<typename T::device_type>>
97  : std::true_type {
98  using type = typename T::device_type;
99 };
100 
101 template <class T>
102 using detected_device_type_t = typename is_detected_device_type<T>::type;
103 
104 //----------------------------------------------------------------------------
113 template <class Functor, class Policy>
114 struct FunctorPolicyExecutionSpace {
115  using execution_space = std::conditional_t<
116  is_detected_execution_space<Policy>::value,
117  detected_execution_space_t<Policy>,
118  std::conditional_t<
119  is_detected_execution_space<Functor>::value,
120  detected_execution_space_t<Functor>,
121  std::conditional_t<
122  is_detected_device_type<Functor>::value,
123  detected_execution_space_t<detected_device_type_t<Functor>>,
124  Kokkos::DefaultExecutionSpace>>>;
125 };
126 
127 } // namespace Impl
128 } // namespace Kokkos
129 
130 //----------------------------------------------------------------------------
131 //----------------------------------------------------------------------------
132 
133 namespace Kokkos {
134 
156 template <class ExecPolicy, class FunctorType>
157 inline void parallel_for(
158  const ExecPolicy& policy, const FunctorType& functor,
159  const std::string& str = "",
160  typename std::enable_if<
161  Kokkos::Impl::is_execution_policy<ExecPolicy>::value>::type* =
162  nullptr) {
163  uint64_t kpID = 0;
164 
165  ExecPolicy inner_policy = policy;
166  Kokkos::Tools::Impl::begin_parallel_for(inner_policy, functor, str, kpID);
167 
168  Kokkos::Impl::shared_allocation_tracking_disable();
169  Impl::ParallelFor<FunctorType, ExecPolicy> closure(functor, inner_policy);
170  Kokkos::Impl::shared_allocation_tracking_enable();
171 
172  closure.execute();
173 
174  Kokkos::Tools::Impl::end_parallel_for(inner_policy, functor, str, kpID);
175 }
176 
177 template <class FunctorType>
178 inline void parallel_for(const size_t work_count, const FunctorType& functor,
179  const std::string& str = "") {
180  using execution_space =
181  typename Impl::FunctorPolicyExecutionSpace<FunctorType,
182  void>::execution_space;
183  using policy = RangePolicy<execution_space>;
184 
185  uint64_t kpID = 0;
186 
187  policy execution_policy = policy(0, work_count);
188 
189  Kokkos::Tools::Impl::begin_parallel_for(execution_policy, functor, str, kpID);
190 
191  Kokkos::Impl::shared_allocation_tracking_disable();
192  Impl::ParallelFor<FunctorType, policy> closure(functor, execution_policy);
193  Kokkos::Impl::shared_allocation_tracking_enable();
194 
195  closure.execute();
196 
197  Kokkos::Tools::Impl::end_parallel_for(execution_policy, functor, str, kpID);
198 }
199 
200 template <class ExecPolicy, class FunctorType>
201 inline void parallel_for(const std::string& str, const ExecPolicy& policy,
202  const FunctorType& functor) {
203 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
204  Kokkos::fence();
205  std::cout << "KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
206 #endif
207 
208  ::Kokkos::parallel_for(policy, functor, str);
209 
210 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
211  Kokkos::fence();
212  std::cout << "KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
213 #endif
214  (void)str;
215 }
216 
217 } // namespace Kokkos
218 
219 #include <Kokkos_Parallel_Reduce.hpp>
220 //----------------------------------------------------------------------------
221 //----------------------------------------------------------------------------
222 
223 namespace Kokkos {
224 
370 // i/ }
387 template <class ExecutionPolicy, class FunctorType>
388 inline void parallel_scan(
389  const ExecutionPolicy& policy, const FunctorType& functor,
390  const std::string& str = "",
391  typename std::enable_if<
392  Kokkos::Impl::is_execution_policy<ExecutionPolicy>::value>::type* =
393  nullptr) {
394  uint64_t kpID = 0;
395  ExecutionPolicy inner_policy = policy;
396  Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
397 
398  Kokkos::Impl::shared_allocation_tracking_disable();
399  Impl::ParallelScan<FunctorType, ExecutionPolicy> closure(functor,
400  inner_policy);
401  Kokkos::Impl::shared_allocation_tracking_enable();
402 
403  closure.execute();
404 
405  Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
406 }
407 
408 template <class FunctorType>
409 inline void parallel_scan(const size_t work_count, const FunctorType& functor,
410  const std::string& str = "") {
411  using execution_space =
412  typename Kokkos::Impl::FunctorPolicyExecutionSpace<FunctorType,
413  void>::execution_space;
414 
416 
417  uint64_t kpID = 0;
418  policy execution_policy(0, work_count);
419  Kokkos::Tools::Impl::begin_parallel_scan(execution_policy, functor, str,
420  kpID);
421  Kokkos::Impl::shared_allocation_tracking_disable();
422  Impl::ParallelScan<FunctorType, policy> closure(functor, execution_policy);
423  Kokkos::Impl::shared_allocation_tracking_enable();
424 
425  closure.execute();
426 
427  Kokkos::Tools::Impl::end_parallel_scan(execution_policy, functor, str, kpID);
428 }
429 
430 template <class ExecutionPolicy, class FunctorType>
431 inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy,
432  const FunctorType& functor) {
433 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
434  Kokkos::fence();
435  std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
436 #endif
437 
438  ::Kokkos::parallel_scan(policy, functor, str);
439 
440 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
441  Kokkos::fence();
442  std::cout << "KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
443 #endif
444  (void)str;
445 }
446 
447 template <class ExecutionPolicy, class FunctorType, class ReturnType>
448 inline void parallel_scan(
449  const ExecutionPolicy& policy, const FunctorType& functor,
450  ReturnType& return_value, const std::string& str = "",
451  typename std::enable_if<
452  Kokkos::Impl::is_execution_policy<ExecutionPolicy>::value>::type* =
453  nullptr) {
454  uint64_t kpID = 0;
455  ExecutionPolicy inner_policy = policy;
456  Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
457 
458  Kokkos::Impl::shared_allocation_tracking_disable();
459  Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy, ReturnType> closure(
460  functor, inner_policy, return_value);
461  Kokkos::Impl::shared_allocation_tracking_enable();
462 
463  closure.execute();
464 
465  Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
466 
467  policy.space().fence();
468 }
469 
470 template <class FunctorType, class ReturnType>
471 inline void parallel_scan(const size_t work_count, const FunctorType& functor,
472  ReturnType& return_value,
473  const std::string& str = "") {
474  using execution_space =
475  typename Kokkos::Impl::FunctorPolicyExecutionSpace<FunctorType,
476  void>::execution_space;
477 
479 
480  policy execution_policy(0, work_count);
481  uint64_t kpID = 0;
482  Kokkos::Tools::Impl::begin_parallel_scan(execution_policy, functor, str,
483  kpID);
484 
485  Kokkos::Impl::shared_allocation_tracking_disable();
486  Impl::ParallelScanWithTotal<FunctorType, policy, ReturnType> closure(
487  functor, execution_policy, return_value);
488  Kokkos::Impl::shared_allocation_tracking_enable();
489 
490  closure.execute();
491 
492  Kokkos::Tools::Impl::end_parallel_scan(execution_policy, functor, str, kpID);
493 
494  execution_space().fence();
495 }
496 
497 template <class ExecutionPolicy, class FunctorType, class ReturnType>
498 inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy,
499  const FunctorType& functor,
500  ReturnType& return_value) {
501 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
502  Kokkos::fence();
503  std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
504 #endif
505 
506  ::Kokkos::parallel_scan(policy, functor, return_value, str);
507 
508 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES
509  Kokkos::fence();
510  std::cout << "KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
511 #endif
512  (void)str;
513 }
514 
515 } // namespace Kokkos
516 
517 //----------------------------------------------------------------------------
518 //----------------------------------------------------------------------------
519 
520 namespace Kokkos {
521 namespace Impl {
522 
523 template <class FunctorType,
524  bool HasTeamShmemSize =
525  has_member_team_shmem_size<FunctorType>::value,
526  bool HasShmemSize = has_member_shmem_size<FunctorType>::value>
527 struct FunctorTeamShmemSize {
528  KOKKOS_INLINE_FUNCTION static size_t value(const FunctorType&, int) {
529  return 0;
530  }
531 };
532 
533 template <class FunctorType>
534 struct FunctorTeamShmemSize<FunctorType, true, false> {
535  static inline size_t value(const FunctorType& f, int team_size) {
536  return f.team_shmem_size(team_size);
537  }
538 };
539 
540 template <class FunctorType>
541 struct FunctorTeamShmemSize<FunctorType, false, true> {
542  static inline size_t value(const FunctorType& f, int team_size) {
543  return f.shmem_size(team_size);
544  }
545 };
546 template <class FunctorType>
547 struct FunctorTeamShmemSize<FunctorType, true, true> {
548  static inline size_t value(const FunctorType& /*f*/, int /*team_size*/) {
549  Kokkos::abort(
550  "Functor with both team_shmem_size and shmem_size defined is "
551  "not allowed");
552  return 0;
553  }
554 };
555 
556 } // namespace Impl
557 } // namespace Kokkos
558 
559 //----------------------------------------------------------------------------
560 //----------------------------------------------------------------------------
561 
562 #endif /* KOKKOS_PARALLEL_HPP */
Implementation of the ParallelFor operator that has a partial specialization for the device...
ReturnType
Given a Functor and Execution Policy query an execution space.
Execution policy for work over a range of an integral type.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Execute functor in parallel according to the execution policy.
Definition: dummy.cpp:3