44 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP 45 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP 51 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp" 56 namespace KokkosRefactor {
62 template <
typename DstView,
typename SrcView,
typename IdxView>
63 struct PackArraySingleColumn<
64 DstView, SrcView, IdxView,
65 typename
std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
66 Kokkos::is_view_mp_vector<SrcView>::value >::type >
69 typedef typename execution_space::size_type
size_type;
80 dst(dst_), src(src_), idx(idx_), col(col_) {}
82 KOKKOS_INLINE_FUNCTION
84 dst(k) = src(idx(k), col);
87 KOKKOS_INLINE_FUNCTION
89 dst(k).fastAccessCoeff(tidx) = src(idx(k), col).fastAccessCoeff(tidx);
92 static void pack(
const DstView& dst,
99 PackArraySingleColumn(dst,src,idx,col) );
103 template <
typename DstView,
typename SrcView,
typename IdxView>
104 struct PackArrayMultiColumn<
105 DstView, SrcView, IdxView,
106 typename
std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
107 Kokkos::is_view_mp_vector<SrcView>::value >::type >
121 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
123 KOKKOS_INLINE_FUNCTION
126 const size_t offset = k*numCols;
127 for (
size_t j = 0;
j < numCols; ++
j)
128 dst(offset +
j) = src(localRow,
j);
131 KOKKOS_INLINE_FUNCTION
134 const size_t offset = k*numCols;
135 for (
size_t j = 0;
j < numCols; ++
j)
136 dst(offset +
j).fastAccessCoeff(tidx) =
137 src(localRow,
j).fastAccessCoeff(tidx);
140 static void pack(
const DstView& dst,
144 Kokkos::parallel_for(
146 PackArrayMultiColumn(dst,src,idx,numCols) );
150 template <
typename DstView,
typename SrcView,
typename IdxView,
152 struct PackArrayMultiColumnVariableStride<
153 DstView, SrcView, IdxView, ColView,
154 typename
std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
155 Kokkos::is_view_mp_vector<SrcView>::value >::type >
171 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
173 KOKKOS_INLINE_FUNCTION
176 const size_t offset = k*numCols;
177 for (
size_t j = 0;
j < numCols; ++
j)
178 dst(offset +
j) = src(localRow, col(
j));
181 KOKKOS_INLINE_FUNCTION
184 const size_t offset = k*numCols;
185 for (
size_t j = 0;
j < numCols; ++
j)
186 dst(offset +
j).fastAccessCoeff(tidx) =
187 src(localRow, col(
j)).fastAccessCoeff(tidx);
190 static void pack(
const DstView& dst,
195 Kokkos::parallel_for(
198 PackArrayMultiColumnVariableStride(dst,src,idx,col,numCols) );
202 template <
typename ExecutionSpace,
207 struct UnpackArrayMultiColumn<
208 ExecutionSpace, DstView, SrcView, IdxView, Op,
209 typename
std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
210 Kokkos::is_view_mp_vector<SrcView>::value >::type >
226 const size_t numCols_) :
234 template <
class TagType>
235 KOKKOS_INLINE_FUNCTION
void 239 const size_t offset = k*numCols;
240 for (
size_t j = 0;
j < numCols; ++
j) {
241 op (tag, dst(localRow,
j), src(offset+
j));
245 template <
class TagType>
246 KOKKOS_INLINE_FUNCTION
void 250 const size_t offset = k*numCols;
251 for (
size_t j = 0;
j < numCols; ++
j) {
263 const size_t numCols,
264 const bool use_atomic_updates)
266 if (use_atomic_updates) {
268 (
"Tpetra::MultiVector (Sacado::MP::Vector) unpack (constant stride)",
271 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
275 (
"Tpetra::MultiVector (Sacado::MP::Vector) unpack (constant stride)",
278 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
283 template <
typename ExecutionSpace,
289 struct UnpackArrayMultiColumnVariableStride<
290 ExecutionSpace, DstView, SrcView, IdxView, ColView, Op,
291 typename
std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
292 Kokkos::is_view_mp_vector<SrcView>::value >::type >
310 const size_t numCols_) :
319 template <
class TagType>
320 KOKKOS_INLINE_FUNCTION
void 324 const size_t offset = k*numCols;
325 for (
size_t j = 0;
j < numCols; ++
j) {
326 op (tag, dst(localRow, col(
j)), src(offset+
j));
330 template <
class TagType>
331 KOKKOS_INLINE_FUNCTION
void 335 const size_t offset = k*numCols;
336 for (
size_t j = 0;
j < numCols; ++
j) {
349 const size_t numCols,
350 const bool use_atomic_updates)
352 if (use_atomic_updates) {
354 (
"Tpetra::MultiVector unpack (Sacado::MP::Vector) (nonconstant stride)",
357 UnpackArrayMultiColumnVariableStride (execSpace, dst, src, idx, col, op, numCols));
361 (
"Tpetra::MultiVector unpack (Sacado::MP::Vector) (nonconstant stride)",
364 UnpackArrayMultiColumnVariableStride (execSpace, dst, src, idx, col, op, numCols));
369 template <
typename DstView,
typename SrcView,
370 typename DstIdxView,
typename SrcIdxView,
typename Op>
371 struct PermuteArrayMultiColumn<
372 DstView, SrcView, DstIdxView, SrcIdxView, Op,
373 typename
std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
374 Kokkos::is_view_mp_vector<SrcView>::value >::type >
388 const DstIdxView& dst_idx_,
389 const SrcIdxView& src_idx_,
390 size_t numCols_,
const Op& op_) :
391 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
392 numCols(numCols_), op(op_) {}
394 KOKKOS_INLINE_FUNCTION
399 for (
size_t j = 0;
j < numCols; ++
j)
400 op(tag, dst(toRow,
j),src(fromRow,
j));
403 KOKKOS_INLINE_FUNCTION
408 for (
size_t j = 0;
j < numCols; ++
j)
415 const DstIdxView& dst_idx,
416 const SrcIdxView& src_idx,
420 Kokkos::parallel_for(
423 PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols,op) );
427 template <
typename DstView,
typename SrcView,
428 typename DstIdxView,
typename SrcIdxView,
429 typename DstColView,
typename SrcColView,
typename Op>
430 struct PermuteArrayMultiColumnVariableStride<
431 DstView, SrcView, DstIdxView, SrcIdxView, DstColView, SrcColView, Op,
432 typename
std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
433 Kokkos::is_view_mp_vector<SrcView>::value >::type >
449 const DstIdxView& dst_idx_,
450 const SrcIdxView& src_idx_,
451 const DstColView& dst_col_,
452 const SrcColView& src_col_,
455 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
456 dst_col(dst_col_), src_col(src_col_),
457 numCols(numCols_), op(op_) {}
459 KOKKOS_INLINE_FUNCTION
464 for (
size_t j = 0;
j < numCols; ++
j)
465 op(tag, dst(toRow, dst_col(
j)),src(fromRow, src_col(
j)));
468 KOKKOS_INLINE_FUNCTION
473 for (
size_t j = 0;
j < numCols; ++
j)
480 const DstIdxView& dst_idx,
481 const SrcIdxView& src_idx,
482 const DstColView& dst_col,
483 const SrcColView& src_col,
487 Kokkos::parallel_for(
490 PermuteArrayMultiColumnVariableStride(
491 dst,src,dst_idx,src_idx,dst_col,src_col,numCols,op) );
499 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP static void pack(const DstView &dst, const SrcView &src, const IdxView &idx, size_t col)
PackArrayMultiColumnVariableStride(const DstView &dst_, const SrcView &src_, const IdxView &idx_, const ColView &col_, size_t numCols_)
static void permute(const DstView &dst, const SrcView &src, const DstIdxView &dst_idx, const SrcIdxView &src_idx, const DstColView &dst_col, const SrcColView &src_col, size_t numCols, const Op &op)
UnpackArrayMultiColumnVariableStride(const ExecutionSpace &, const DstView &dst_, const SrcView &src_, const IdxView &idx_, const ColView &col_, const Op &op_, const size_t numCols_)
static void unpack(const ExecutionSpace &execSpace, const DstView &dst, const SrcView &src, const IdxView &idx, const Op &op, const size_t numCols, const bool use_atomic_updates)
execution_space::size_type size_type
Kokkos::DefaultExecutionSpace execution_space
execution_space::size_type size_type
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
ExecutionSpace::execution_space execution_space
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
DstView::execution_space execution_space
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
PermuteArrayMultiColumnVariableStride(const DstView &dst_, const SrcView &src_, const DstIdxView &dst_idx_, const SrcIdxView &src_idx_, const DstColView &dst_col_, const SrcColView &src_col_, size_t numCols_, const Op &op_)
PermuteArrayMultiColumn(const DstView &dst_, const SrcView &src_, const DstIdxView &dst_idx_, const SrcIdxView &src_idx_, size_t numCols_, const Op &op_)
execution_space::size_type size_type
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
DstView::execution_space execution_space
PackArraySingleColumn(const DstView &dst_, const SrcView &src_, const IdxView &idx_, size_t col_)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
ExecutionSpace::execution_space execution_space
static void pack(const DstView &dst, const SrcView &src, const IdxView &idx, const ColView &col, size_t numCols)
DstView::execution_space execution_space
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
PackArrayMultiColumn(const DstView &dst_, const SrcView &src_, const IdxView &idx_, size_t numCols_)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
static void pack(const DstView &dst, const SrcView &src, const IdxView &idx, size_t numCols)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
DstView::execution_space execution_space
Team-based parallel work configuration for Sacado::MP::Vector.
UnpackArrayMultiColumn(const ExecutionSpace &, const DstView &dst_, const SrcView &src_, const IdxView &idx_, const Op &op_, const size_t numCols_)
static void permute(const DstView &dst, const SrcView &src, const DstIdxView &dst_idx, const SrcIdxView &src_idx, size_t numCols, const Op &op)
execution_space::size_type size_type
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
execution_space::size_type size_type
execution_space::size_type size_type
execution_space::size_type size_type
DstView::execution_space execution_space
static void unpack(const ExecutionSpace &execSpace, const DstView &dst, const SrcView &src, const IdxView &idx, const ColView &col, const Op &op, const size_t numCols, const bool use_atomic_updates)