Kokkos Core Kernels Package  Version of the Day
Kokkos_Cuda.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_CUDA_HPP
46 #define KOKKOS_CUDA_HPP
47 
48 #include <Kokkos_Macros.hpp>
49 #if defined(KOKKOS_ENABLE_CUDA)
50 
51 #include <Kokkos_Core_fwd.hpp>
52 
53 #include <iosfwd>
54 #include <vector>
55 
56 #include <impl/Kokkos_AnalyzePolicy.hpp>
57 #include <Kokkos_CudaSpace.hpp>
58 
59 #include <Kokkos_Parallel.hpp>
60 #include <Kokkos_TaskScheduler.hpp>
61 #include <Kokkos_Layout.hpp>
62 #include <Kokkos_ScratchSpace.hpp>
63 #include <Kokkos_MemoryTraits.hpp>
64 #include <impl/Kokkos_Tags.hpp>
65 #include <impl/Kokkos_ExecSpaceInitializer.hpp>
66 #include <impl/Kokkos_HostSharedPtr.hpp>
67 
68 /*--------------------------------------------------------------------------*/
69 
70 namespace Kokkos {
71 namespace Impl {
72 class CudaExec;
73 class CudaInternal;
74 } // namespace Impl
75 } // namespace Kokkos
76 
77 /*--------------------------------------------------------------------------*/
78 
79 namespace Kokkos {
80 
81 namespace Impl {
82 namespace Experimental {
83 enum class CudaLaunchMechanism : unsigned {
84  Default = 0,
85  ConstantMemory = 1,
86  GlobalMemory = 2,
87  LocalMemory = 4
88 };
89 
90 constexpr inline CudaLaunchMechanism operator|(CudaLaunchMechanism p1,
91  CudaLaunchMechanism p2) {
92  return static_cast<CudaLaunchMechanism>(static_cast<unsigned>(p1) |
93  static_cast<unsigned>(p2));
94 }
95 constexpr inline CudaLaunchMechanism operator&(CudaLaunchMechanism p1,
96  CudaLaunchMechanism p2) {
97  return static_cast<CudaLaunchMechanism>(static_cast<unsigned>(p1) &
98  static_cast<unsigned>(p2));
99 }
100 
101 template <CudaLaunchMechanism l>
102 struct CudaDispatchProperties {
103  CudaLaunchMechanism launch_mechanism = l;
104 };
105 } // namespace Experimental
106 } // namespace Impl
117 class Cuda {
118  public:
120 
121 
123  using execution_space = Cuda;
124 
125 #if defined(KOKKOS_ENABLE_CUDA_UVM)
126  using memory_space = CudaUVMSpace;
128 #else
129  using memory_space = CudaSpace;
131 #endif
132 
134  using device_type = Kokkos::Device<execution_space, memory_space>;
135 
137  using size_type = memory_space::size_type;
138 
140  using array_layout = LayoutLeft;
141 
143  using scratch_memory_space = ScratchMemorySpace<Cuda>;
144 
146  //--------------------------------------------------
148 
149 
152  KOKKOS_INLINE_FUNCTION static int in_parallel() {
153 #if defined(__CUDA_ARCH__)
154  return true;
155 #else
156  return false;
157 #endif
158  }
159 
171  static bool sleep();
172 
178  static bool wake();
179 
186  static void impl_static_fence();
187 
188  void fence() const;
189 
191  static int concurrency();
192 
194  static void print_configuration(std::ostream&, const bool detail = false);
195 
197  //--------------------------------------------------
199 
200  Cuda();
201 
202  Cuda(cudaStream_t stream);
203 
204  //--------------------------------------------------------------------------
206 
207 
208  struct SelectDevice {
209  int cuda_device_id;
210  SelectDevice() : cuda_device_id(0) {}
211  explicit SelectDevice(int id) : cuda_device_id(id) {}
212  };
213 
215  static void impl_finalize();
216 
218  static int impl_is_initialized();
219 
221  static void impl_initialize(const SelectDevice = SelectDevice(),
222  const size_t num_instances = 1);
223 
227  static size_type device_arch();
228 
230  static size_type detect_device_count();
231 
235  static std::vector<unsigned> detect_device_arch();
236 
237  cudaStream_t cuda_stream() const;
238  int cuda_device() const;
239  const cudaDeviceProp& cuda_device_prop() const;
240 
242  //--------------------------------------------------------------------------
243 
244  static const char* name();
245 
246  inline Impl::CudaInternal* impl_internal_space_instance() const {
247  return m_space_instance.get();
248  }
249  uint32_t impl_instance_id() const noexcept { return 0; }
250 
251  private:
252  Kokkos::Impl::HostSharedPtr<Impl::CudaInternal> m_space_instance;
253 };
254 
255 namespace Tools {
256 namespace Experimental {
257 template <>
258 struct DeviceTypeTraits<Cuda> {
260  static constexpr DeviceType id = DeviceType::Cuda;
261 };
262 } // namespace Experimental
263 } // namespace Tools
264 
265 namespace Impl {
266 
267 class CudaSpaceInitializer : public ExecSpaceInitializerBase {
268  public:
269  CudaSpaceInitializer() = default;
270  ~CudaSpaceInitializer() = default;
271  void initialize(const InitArguments& args) final;
272  void finalize(const bool all_spaces) final;
273  void fence() final;
274  void print_configuration(std::ostream& msg, const bool detail) final;
275 };
276 
277 } // namespace Impl
278 } // namespace Kokkos
279 
280 /*--------------------------------------------------------------------------*/
281 /*--------------------------------------------------------------------------*/
282 
283 namespace Kokkos {
284 namespace Impl {
285 
286 template <>
287 struct MemorySpaceAccess<Kokkos::CudaSpace,
288  Kokkos::Cuda::scratch_memory_space> {
289  enum : bool { assignable = false };
290  enum : bool { accessible = true };
291  enum : bool { deepcopy = false };
292 };
293 
294 #if defined(KOKKOS_ENABLE_CUDA_UVM)
295 
296 // If forcing use of UVM everywhere
297 // then must assume that CudaUVMSpace
298 // can be a stand-in for CudaSpace.
299 // This will fail when a strange host-side execution space
300 // that defines CudaUVMSpace as its preferredmemory space.
301 
302 template <>
303 struct MemorySpaceAccess<Kokkos::CudaUVMSpace,
304  Kokkos::Cuda::scratch_memory_space> {
305  enum : bool { assignable = false };
306  enum : bool { accessible = true };
307  enum : bool { deepcopy = false };
308 };
309 
310 #endif
311 
312 } // namespace Impl
313 } // namespace Kokkos
314 
315 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
316 #endif /* #ifndef KOKKOS_CUDA_HPP */
Declaration of various MemoryLayout options.
Declaration of parallel operators.
Definition: dummy.cpp:3