45 #ifndef KOKKOS_CRS_HPP 46 #define KOKKOS_CRS_HPP 48 #include <Kokkos_View.hpp> 49 #include <Kokkos_CopyViews.hpp> 83 template <
class DataType,
class Arg1Type,
class Arg2Type = void,
84 typename SizeType =
typename ViewTraits<DataType*, Arg1Type, Arg2Type,
91 using data_type = DataType;
92 using array_layout =
typename traits::array_layout;
93 using execution_space =
typename traits::execution_space;
94 using memory_space =
typename traits::memory_space;
95 using device_type =
typename traits::device_type;
96 using size_type = SizeType;
110 KOKKOS_DEFAULTED_FUNCTION
Crs() =
default;
111 KOKKOS_DEFAULTED_FUNCTION
Crs(
Crs const&) =
default;
112 KOKKOS_DEFAULTED_FUNCTION
Crs(
Crs&&) =
default;
113 KOKKOS_DEFAULTED_FUNCTION
Crs& operator=(
Crs const&) =
default;
114 KOKKOS_DEFAULTED_FUNCTION
Crs& operator=(
Crs&&) =
default;
115 KOKKOS_DEFAULTED_FUNCTION ~
Crs() =
default;
121 template <
class EntriesType,
class RowMapType>
122 KOKKOS_INLINE_FUNCTION
Crs(
const RowMapType& row_map_,
123 const EntriesType& entries_)
124 : row_map(row_map_), entries(entries_) {}
128 KOKKOS_INLINE_FUNCTION
130 return (row_map.
extent(0) != 0)
131 ? row_map.
extent(0) -
static_cast<size_type
>(1)
132 : static_cast<size_type>(0);
138 template <
class OutCounts,
class DataType,
class Arg1Type,
class Arg2Type,
140 void get_crs_transpose_counts(
141 OutCounts& out, Crs<DataType, Arg1Type, Arg2Type, SizeType>
const& in,
142 std::string
const& name =
"transpose_counts");
144 template <
class OutCounts,
class InCrs>
145 typename OutCounts::value_type get_crs_row_map_from_counts(
146 OutCounts& out, InCrs
const& in, std::string
const& name =
"row_map");
148 template <
class DataType,
class Arg1Type,
class Arg2Type,
class SizeType>
149 void transpose_crs(Crs<DataType, Arg1Type, Arg2Type, SizeType>& out,
150 Crs<DataType, Arg1Type, Arg2Type, SizeType>
const& in);
161 template <
class InCrs,
class OutCounts>
162 class GetCrsTransposeCounts {
164 using execution_space =
typename InCrs::execution_space;
165 using self_type = GetCrsTransposeCounts<InCrs, OutCounts>;
166 using index_type =
typename InCrs::size_type;
173 KOKKOS_INLINE_FUNCTION
174 void operator()(index_type i)
const { atomic_increment(&out[in.entries(i)]); }
175 GetCrsTransposeCounts(InCrs
const& arg_in, OutCounts
const& arg_out)
176 : in(arg_in), out(arg_out) {
177 using policy_type = RangePolicy<index_type, execution_space>;
179 const closure_type closure(*
this,
180 policy_type(0, index_type(in.entries.size())));
182 execution_space().fence();
186 template <
class InCounts,
class OutRowMap>
187 class CrsRowMapFromCounts {
189 using execution_space =
typename InCounts::execution_space;
190 using value_type =
typename OutRowMap::value_type;
191 using index_type =
typename InCounts::size_type;
197 last_value_type m_last_value;
200 KOKKOS_INLINE_FUNCTION
201 void operator()(index_type i, value_type& update,
bool final_pass)
const {
202 if (i < static_cast<index_type>(m_in.size())) {
204 if (final_pass) m_out(i + 1) = update;
205 }
else if (final_pass) {
207 m_last_value() = update;
210 KOKKOS_INLINE_FUNCTION
211 void init(value_type& update)
const { update = 0; }
212 KOKKOS_INLINE_FUNCTION
213 void join(
volatile value_type& update,
214 const volatile value_type& input)
const {
217 using self_type = CrsRowMapFromCounts<InCounts, OutRowMap>;
218 CrsRowMapFromCounts(InCounts
const& arg_in, OutRowMap
const& arg_out)
219 : m_in(arg_in), m_out(arg_out), m_last_value(
"last_value") {}
220 value_type execute() {
221 using policy_type = RangePolicy<index_type, execution_space>;
223 closure_type closure(*
this, policy_type(0, m_in.size() + 1));
225 auto last_value = Kokkos::create_mirror_view(m_last_value);
226 Kokkos::deep_copy(last_value, m_last_value);
231 template <
class InCrs,
class OutCrs>
232 class FillCrsTransposeEntries {
234 using execution_space =
typename InCrs::execution_space;
235 using memory_space =
typename InCrs::memory_space;
236 using value_type =
typename OutCrs::entries_type::value_type;
237 using index_type =
typename InCrs::size_type;
240 using counters_type = View<index_type*, memory_space>;
243 counters_type counters;
246 KOKKOS_INLINE_FUNCTION
247 void operator()(index_type i)
const {
248 auto begin = in.row_map(i);
249 auto end = in.row_map(i + 1);
250 for (
auto j = begin; j < end; ++j) {
251 auto ti = in.entries(j);
252 auto tbegin = out.row_map(ti);
253 auto tj = atomic_fetch_add(&counters(ti), 1);
254 out.entries(tbegin + tj) = i;
257 using self_type = FillCrsTransposeEntries<InCrs, OutCrs>;
258 FillCrsTransposeEntries(InCrs
const& arg_in, OutCrs
const& arg_out)
259 : in(arg_in), out(arg_out), counters(
"counters", arg_out.numRows()) {
260 using policy_type = RangePolicy<index_type, execution_space>;
262 const closure_type closure(*
this, policy_type(0, index_type(in.numRows())));
264 execution_space().fence();
277 template <
class OutCounts,
class DataType,
class Arg1Type,
class Arg2Type,
279 void get_crs_transpose_counts(
280 OutCounts& out, Crs<DataType, Arg1Type, Arg2Type, SizeType>
const& in,
281 std::string
const& name) {
282 using InCrs = Crs<DataType, Arg1Type, Arg2Type, SizeType>;
283 out = OutCounts(name, in.numRows());
284 Kokkos::Impl::GetCrsTransposeCounts<InCrs, OutCounts> functor(in, out);
287 template <
class OutRowMap,
class InCounts>
288 typename OutRowMap::value_type get_crs_row_map_from_counts(
289 OutRowMap& out, InCounts
const& in, std::string
const& name) {
290 out = OutRowMap(view_alloc(WithoutInitializing, name), in.size() + 1);
291 Kokkos::Impl::CrsRowMapFromCounts<InCounts, OutRowMap> functor(in, out);
292 return functor.execute();
295 template <
class DataType,
class Arg1Type,
class Arg2Type,
class SizeType>
296 void transpose_crs(Crs<DataType, Arg1Type, Arg2Type, SizeType>& out,
297 Crs<DataType, Arg1Type, Arg2Type, SizeType>
const& in) {
298 using crs_type = Crs<DataType, Arg1Type, Arg2Type, SizeType>;
299 using memory_space =
typename crs_type::memory_space;
300 using counts_type = View<SizeType*, memory_space>;
303 Kokkos::get_crs_transpose_counts(counts, in);
304 Kokkos::get_crs_row_map_from_counts(out.row_map, counts,
307 out.entries = decltype(out.entries)(
"transpose_entries", in.entries.size());
308 Kokkos::Impl::FillCrsTransposeEntries<crs_type, crs_type> entries_functor(
312 template <
class CrsType,
class Functor,
313 class ExecutionSpace =
typename CrsType::execution_space>
314 struct CountAndFillBase;
316 template <
class CrsType,
class Functor,
class ExecutionSpace>
317 struct CountAndFillBase {
318 using data_type =
typename CrsType::data_type;
319 using size_type =
typename CrsType::size_type;
320 using row_map_type =
typename CrsType::row_map_type;
321 using counts_type = row_map_type;
324 counts_type m_counts;
326 inline void operator()(Count, size_type i)
const {
327 m_counts(i) = m_functor(i,
nullptr);
330 inline void operator()(Fill, size_type i)
const {
331 auto j = m_crs.row_map(i);
337 data_type* fill = (j ==
static_cast<decltype(j)
>(m_crs.entries.extent(0)))
339 : (&(m_crs.entries(j)));
342 CountAndFillBase(CrsType& crs, Functor
const& f) : m_crs(crs), m_functor(f) {}
345 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) 346 #if defined(KOKKOS_ENABLE_CUDA) 347 #define EXEC_SPACE Kokkos::Cuda 348 #elif defined(KOKKOS_ENABLE_HIP) 349 #define EXEC_SPACE Kokkos::Experimental::HIP 351 template <
class CrsType,
class Functor>
352 struct CountAndFillBase<CrsType, Functor, EXEC_SPACE> {
353 using data_type =
typename CrsType::data_type;
354 using size_type =
typename CrsType::size_type;
355 using row_map_type =
typename CrsType::row_map_type;
356 using counts_type = row_map_type;
359 counts_type m_counts;
361 __device__
inline void operator()(Count, size_type i)
const {
362 m_counts(i) = m_functor(i,
nullptr);
365 __device__
inline void operator()(Fill, size_type i)
const {
366 auto j = m_crs.row_map(i);
372 data_type* fill = (j ==
static_cast<decltype(j)
>(m_crs.entries.extent(0)))
374 : (&(m_crs.entries(j)));
377 CountAndFillBase(CrsType& crs, Functor
const& f) : m_crs(crs), m_functor(f) {}
381 template <
class CrsType,
class Functor>
382 struct CountAndFill :
public CountAndFillBase<CrsType, Functor> {
383 using base_type = CountAndFillBase<CrsType, Functor>;
384 using typename base_type::Count;
385 using typename base_type::counts_type;
386 using typename base_type::data_type;
387 using typename base_type::Fill;
388 using typename base_type::size_type;
389 using entries_type =
typename CrsType::entries_type;
390 using self_type = CountAndFill<CrsType, Functor>;
391 CountAndFill(CrsType& crs, size_type nrows, Functor
const& f)
392 : base_type(crs, f) {
393 using execution_space =
typename CrsType::execution_space;
394 this->m_counts = counts_type(
"counts", nrows);
396 using count_policy_type = RangePolicy<size_type, execution_space, Count>;
397 using count_closure_type =
399 const count_closure_type closure(*
this, count_policy_type(0, nrows));
402 auto nentries = Kokkos::get_crs_row_map_from_counts(this->m_crs.row_map,
404 this->m_counts = counts_type();
405 this->m_crs.entries = entries_type(
"entries", nentries);
407 using fill_policy_type = RangePolicy<size_type, execution_space, Fill>;
408 using fill_closure_type =
410 const fill_closure_type closure(*
this, fill_policy_type(0, nrows));
417 template <
class CrsType,
class Functor>
418 void count_and_fill_crs(CrsType& crs,
typename CrsType::size_type nrows,
420 Kokkos::CountAndFill<CrsType, Functor>(crs, nrows, f);
KOKKOS_INLINE_FUNCTION size_type numRows() const
Return number of rows in the graph.
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< std::is_integral< iType >::value, size_t >::type extent(const iType &r) const noexcept
rank() to be implemented
Implementation detail of parallel_scan.
Compressed row storage array.
KOKKOS_INLINE_FUNCTION Crs(const RowMapType &row_map_, const EntriesType &entries_)
Assign to a view of the rhs array. If the old view is the last view then allocated memory is dealloca...
Implementation of the ParallelFor operator that has a partial specialization for the device...
Traits class for accessing attributes of a View.