Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels_MP_Vector.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP
45 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP
46 
47 //----------------------------------------------------------------------------
48 // Specializations of Tpetra::MultiVector pack/unpack kernels for MP::Vector
49 //----------------------------------------------------------------------------
50 
51 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
54 
55 namespace Tpetra {
56 namespace KokkosRefactor {
57 namespace Details {
58 
59  // Functors for implementing packAndPrepare and unpackAndCombine
60  // through parallel_for
61 
62  template <typename DstView, typename SrcView, typename IdxView>
63  struct PackArraySingleColumn<
64  DstView, SrcView, IdxView,
65  typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
66  Kokkos::is_view_mp_vector<SrcView>::value >::type >
67  {
69  typedef typename execution_space::size_type size_type;
70 
71  DstView dst;
72  SrcView src;
73  IdxView idx;
74  size_t col;
75 
76  PackArraySingleColumn(const DstView& dst_,
77  const SrcView& src_,
78  const IdxView& idx_,
79  size_t col_) :
80  dst(dst_), src(src_), idx(idx_), col(col_) {}
81 
82  KOKKOS_INLINE_FUNCTION
83  void operator()( const size_type k ) const {
84  dst(k) = src(idx(k), col);
85  }
86 
87  KOKKOS_INLINE_FUNCTION
88  void operator()( const size_type k, const size_type tidx ) const {
89  dst(k).fastAccessCoeff(tidx) = src(idx(k), col).fastAccessCoeff(tidx);
90  }
91 
92  static void pack(const DstView& dst,
93  const SrcView& src,
94  const IdxView& idx,
95  size_t col) {
96  Kokkos::parallel_for(
98  idx.size(), Kokkos::dimension_scalar(dst) ),
99  PackArraySingleColumn(dst,src,idx,col) );
100  }
101  };
102 
103  template <typename DstView, typename SrcView, typename IdxView>
104  struct PackArrayMultiColumn<
105  DstView, SrcView, IdxView,
106  typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
107  Kokkos::is_view_mp_vector<SrcView>::value >::type >
108  {
110  typedef typename execution_space::size_type size_type;
111 
112  DstView dst;
113  SrcView src;
114  IdxView idx;
115  size_t numCols;
116 
117  PackArrayMultiColumn(const DstView& dst_,
118  const SrcView& src_,
119  const IdxView& idx_,
120  size_t numCols_) :
121  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
122 
123  KOKKOS_INLINE_FUNCTION
124  void operator()( const size_type k ) const {
125  const typename IdxView::value_type localRow = idx(k);
126  const size_t offset = k*numCols;
127  for (size_t j = 0; j < numCols; ++j)
128  dst(offset + j) = src(localRow, j);
129  }
130 
131  KOKKOS_INLINE_FUNCTION
132  void operator()( const size_type k, const size_type tidx ) const {
133  const typename IdxView::value_type localRow = idx(k);
134  const size_t offset = k*numCols;
135  for (size_t j = 0; j < numCols; ++j)
136  dst(offset + j).fastAccessCoeff(tidx) =
137  src(localRow, j).fastAccessCoeff(tidx);
138  }
139 
140  static void pack(const DstView& dst,
141  const SrcView& src,
142  const IdxView& idx,
143  size_t numCols) {
144  Kokkos::parallel_for(
146  PackArrayMultiColumn(dst,src,idx,numCols) );
147  }
148  };
149 
150  template <typename DstView, typename SrcView, typename IdxView,
151  typename ColView>
152  struct PackArrayMultiColumnVariableStride<
153  DstView, SrcView, IdxView, ColView,
154  typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
155  Kokkos::is_view_mp_vector<SrcView>::value >::type >
156  {
158  typedef typename execution_space::size_type size_type;
159 
160  DstView dst;
161  SrcView src;
162  IdxView idx;
163  ColView col;
164  size_t numCols;
165 
167  const SrcView& src_,
168  const IdxView& idx_,
169  const ColView& col_,
170  size_t numCols_) :
171  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
172 
173  KOKKOS_INLINE_FUNCTION
174  void operator()( const size_type k ) const {
175  const typename IdxView::value_type localRow = idx(k);
176  const size_t offset = k*numCols;
177  for (size_t j = 0; j < numCols; ++j)
178  dst(offset + j) = src(localRow, col(j));
179  }
180 
181  KOKKOS_INLINE_FUNCTION
182  void operator()( const size_type k, const size_type tidx ) const {
183  const typename IdxView::value_type localRow = idx(k);
184  const size_t offset = k*numCols;
185  for (size_t j = 0; j < numCols; ++j)
186  dst(offset + j).fastAccessCoeff(tidx) =
187  src(localRow, col(j)).fastAccessCoeff(tidx);
188  }
189 
190  static void pack(const DstView& dst,
191  const SrcView& src,
192  const IdxView& idx,
193  const ColView& col,
194  size_t numCols) {
195  Kokkos::parallel_for(
197  idx.size(), Kokkos::dimension_scalar(dst) ),
198  PackArrayMultiColumnVariableStride(dst,src,idx,col,numCols) );
199  }
200  };
201 
202  template <typename ExecutionSpace,
203  typename DstView,
204  typename SrcView,
205  typename IdxView,
206  typename Op>
207  struct UnpackArrayMultiColumn<
208  ExecutionSpace, DstView, SrcView, IdxView, Op,
209  typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
210  Kokkos::is_view_mp_vector<SrcView>::value >::type >
211  {
213  typedef typename execution_space::size_type size_type;
214 
215  DstView dst;
216  SrcView src;
217  IdxView idx;
218  Op op;
219  size_t numCols;
220 
221  UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
222  const DstView& dst_,
223  const SrcView& src_,
224  const IdxView& idx_,
225  const Op& op_,
226  const size_t numCols_) :
227  dst (dst_),
228  src (src_),
229  idx (idx_),
230  op (op_),
231  numCols (numCols_)
232  {}
233 
234  template <class TagType>
235  KOKKOS_INLINE_FUNCTION void
236  operator() (TagType tag, const size_type k) const
237  {
238  const typename IdxView::value_type localRow = idx(k);
239  const size_t offset = k*numCols;
240  for (size_t j = 0; j < numCols; ++j) {
241  op (tag, dst(localRow, j), src(offset+j));
242  }
243  }
244 
245  template <class TagType>
246  KOKKOS_INLINE_FUNCTION void
247  operator() (TagType tag, const size_type k, const size_type tidx) const
248  {
249  const typename IdxView::value_type localRow = idx(k);
250  const size_t offset = k*numCols;
251  for (size_t j = 0; j < numCols; ++j) {
252  op (tag, dst(localRow, j).fastAccessCoeff(tidx),
253  src(offset+j).fastAccessCoeff(tidx));
254  }
255  }
256 
257  static void
258  unpack (const ExecutionSpace& execSpace,
259  const DstView& dst,
260  const SrcView& src,
261  const IdxView& idx,
262  const Op& op,
263  const size_t numCols,
264  const bool use_atomic_updates)
265  {
266  if (use_atomic_updates) {
267  Kokkos::parallel_for
268  ("Tpetra::MultiVector (Sacado::MP::Vector) unpack (constant stride)",
270  idx.size (), Kokkos::dimension_scalar (dst)),
271  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
272  }
273  else {
274  Kokkos::parallel_for
275  ("Tpetra::MultiVector (Sacado::MP::Vector) unpack (constant stride)",
277  idx.size (), Kokkos::dimension_scalar (dst)),
278  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
279  }
280  }
281  };
282 
283  template <typename ExecutionSpace,
284  typename DstView,
285  typename SrcView,
286  typename IdxView,
287  typename ColView,
288  typename Op>
289  struct UnpackArrayMultiColumnVariableStride<
290  ExecutionSpace, DstView, SrcView, IdxView, ColView, Op,
291  typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
292  Kokkos::is_view_mp_vector<SrcView>::value >::type >
293  {
295  typedef typename execution_space::size_type size_type;
296 
297  DstView dst;
298  SrcView src;
299  IdxView idx;
300  ColView col;
301  Op op;
302  size_t numCols;
303 
304  UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
305  const DstView& dst_,
306  const SrcView& src_,
307  const IdxView& idx_,
308  const ColView& col_,
309  const Op& op_,
310  const size_t numCols_) :
311  dst (dst_),
312  src (src_),
313  idx (idx_),
314  col (col_),
315  op (op_),
316  numCols (numCols_)
317  {}
318 
319  template <class TagType>
320  KOKKOS_INLINE_FUNCTION void
321  operator() (TagType tag, const size_type k) const
322  {
323  const typename IdxView::value_type localRow = idx(k);
324  const size_t offset = k*numCols;
325  for (size_t j = 0; j < numCols; ++j) {
326  op (tag, dst(localRow, col(j)), src(offset+j));
327  }
328  }
329 
330  template <class TagType>
331  KOKKOS_INLINE_FUNCTION void
332  operator() (TagType tag, const size_type k, const size_type tidx) const
333  {
334  const typename IdxView::value_type localRow = idx(k);
335  const size_t offset = k*numCols;
336  for (size_t j = 0; j < numCols; ++j) {
337  op (tag, dst(localRow, col(j)).fastAccessCoeff(tidx),
338  src(offset+j).fastAccessCoeff(tidx));
339  }
340  }
341 
342  static void
343  unpack (const ExecutionSpace& execSpace,
344  const DstView& dst,
345  const SrcView& src,
346  const IdxView& idx,
347  const ColView& col,
348  const Op& op,
349  const size_t numCols,
350  const bool use_atomic_updates)
351  {
352  if (use_atomic_updates) {
353  Kokkos::parallel_for
354  ("Tpetra::MultiVector unpack (Sacado::MP::Vector) (nonconstant stride)",
356  idx.size (), Kokkos::dimension_scalar (dst)),
357  UnpackArrayMultiColumnVariableStride (execSpace, dst, src, idx, col, op, numCols));
358  }
359  else {
360  Kokkos::parallel_for
361  ("Tpetra::MultiVector unpack (Sacado::MP::Vector) (nonconstant stride)",
363  idx.size (), Kokkos::dimension_scalar (dst)),
364  UnpackArrayMultiColumnVariableStride (execSpace, dst, src, idx, col, op, numCols));
365  }
366  }
367  };
368 
369  template <typename DstView, typename SrcView,
370  typename DstIdxView, typename SrcIdxView, typename Op>
371  struct PermuteArrayMultiColumn<
372  DstView, SrcView, DstIdxView, SrcIdxView, Op,
373  typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
374  Kokkos::is_view_mp_vector<SrcView>::value >::type >
375  {
377  typedef typename execution_space::size_type size_type;
378 
379  DstView dst;
380  SrcView src;
381  DstIdxView dst_idx;
382  SrcIdxView src_idx;
383  size_t numCols;
384  Op op;
385 
386  PermuteArrayMultiColumn(const DstView& dst_,
387  const SrcView& src_,
388  const DstIdxView& dst_idx_,
389  const SrcIdxView& src_idx_,
390  size_t numCols_, const Op& op_) :
391  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
392  numCols(numCols_), op(op_) {}
393 
394  KOKKOS_INLINE_FUNCTION
395  void operator()( const size_type k ) const {
396  const typename DstIdxView::value_type toRow = dst_idx(k);
397  const typename SrcIdxView::value_type fromRow = src_idx(k);
398  nonatomic_tag tag; // permute does not need atomics
399  for (size_t j = 0; j < numCols; ++j)
400  op(tag, dst(toRow, j),src(fromRow, j));
401  }
402 
403  KOKKOS_INLINE_FUNCTION
404  void operator()( const size_type k, const size_type tidx ) const {
405  const typename DstIdxView::value_type toRow = dst_idx(k);
406  const typename SrcIdxView::value_type fromRow = src_idx(k);
407  nonatomic_tag tag; // permute does not need atomics
408  for (size_t j = 0; j < numCols; ++j)
409  op(tag, dst(toRow, j).fastAccessCoeff(tidx),
410  src(fromRow, j).fastAccessCoeff(tidx));
411  }
412 
413  static void permute(const DstView& dst,
414  const SrcView& src,
415  const DstIdxView& dst_idx,
416  const SrcIdxView& src_idx,
417  size_t numCols,
418  const Op& op) {
419  const size_type n = std::min( dst_idx.size(), src_idx.size() );
420  Kokkos::parallel_for(
422  n, Kokkos::dimension_scalar(dst) ),
423  PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols,op) );
424  }
425  };
426 
427  template <typename DstView, typename SrcView,
428  typename DstIdxView, typename SrcIdxView,
429  typename DstColView, typename SrcColView, typename Op>
430  struct PermuteArrayMultiColumnVariableStride<
431  DstView, SrcView, DstIdxView, SrcIdxView, DstColView, SrcColView, Op,
432  typename std::enable_if< Kokkos::is_view_mp_vector<DstView>::value &&
433  Kokkos::is_view_mp_vector<SrcView>::value >::type >
434  {
436  typedef typename execution_space::size_type size_type;
437 
438  DstView dst;
439  SrcView src;
440  DstIdxView dst_idx;
441  SrcIdxView src_idx;
442  DstColView dst_col;
443  SrcColView src_col;
444  size_t numCols;
445  Op op;
446 
448  const SrcView& src_,
449  const DstIdxView& dst_idx_,
450  const SrcIdxView& src_idx_,
451  const DstColView& dst_col_,
452  const SrcColView& src_col_,
453  size_t numCols_,
454  const Op& op_) :
455  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
456  dst_col(dst_col_), src_col(src_col_),
457  numCols(numCols_), op(op_) {}
458 
459  KOKKOS_INLINE_FUNCTION
460  void operator()( const size_type k ) const {
461  const typename DstIdxView::value_type toRow = dst_idx(k);
462  const typename SrcIdxView::value_type fromRow = src_idx(k);
463  nonatomic_tag tag; // permute does not need atomics
464  for (size_t j = 0; j < numCols; ++j)
465  op(tag, dst(toRow, dst_col(j)),src(fromRow, src_col(j)));
466  }
467 
468  KOKKOS_INLINE_FUNCTION
469  void operator()( const size_type k, const size_type tidx ) const {
470  const typename DstIdxView::value_type toRow = dst_idx(k);
471  const typename SrcIdxView::value_type fromRow = src_idx(k);
472  nonatomic_tag tag; // permute does not need atomics
473  for (size_t j = 0; j < numCols; ++j)
474  op(tag, dst(toRow, dst_col(j)).fastAccessCoeff(tidx),
475  src(fromRow, src_col(j)).fastAccessCoeff(tidx));
476  }
477 
478  static void permute(const DstView& dst,
479  const SrcView& src,
480  const DstIdxView& dst_idx,
481  const SrcIdxView& src_idx,
482  const DstColView& dst_col,
483  const SrcColView& src_col,
484  size_t numCols,
485  const Op& op) {
486  const size_type n = std::min( dst_idx.size(), src_idx.size() );
487  Kokkos::parallel_for(
489  n, Kokkos::dimension_scalar(dst) ),
490  PermuteArrayMultiColumnVariableStride(
491  dst,src,dst_idx,src_idx,dst_col,src_col,numCols,op) );
492  }
493  };
494 
495 } // Details namespace
496 } // KokkosRefactor namespace
497 } // Tpetra namespace
498 
499 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP
Kokkos::DefaultExecutionSpace execution_space
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
Team-based parallel work configuration for Sacado::MP::Vector.