Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels_UQ_PCE.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
45 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
46 
47 //----------------------------------------------------------------------------
48 // Specializations of Tpetra::MultiVector pack/unpack kernels for UQ::PCE
49 //----------------------------------------------------------------------------
50 
51 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
54 
55 namespace Tpetra {
56 namespace KokkosRefactor {
57 namespace Details {
58 
59  // Functors for implementing packAndPrepare and unpackAndCombine
60  // through parallel_for
61 
62  template <typename DstView, typename SrcView, typename IdxView>
63  struct PackArraySingleColumn<
64  DstView, SrcView, IdxView,
65  typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
66  Kokkos::is_view_uq_pce<SrcView>::value >::type >
67  {
69  typedef typename execution_space::size_type size_type;
70 
71  static const unsigned BlockSize = 32;
72 
73  DstView dst;
74  SrcView src;
75  IdxView idx;
76  size_t col;
77 
78  PackArraySingleColumn(const DstView& dst_,
79  const SrcView& src_,
80  const IdxView& idx_,
81  size_t col_) :
82  dst(dst_), src(src_), idx(idx_), col(col_) {}
83 
84  KOKKOS_INLINE_FUNCTION
85  void operator()( const size_type k ) const {
86  dst(k) = src(idx(k), col);
87  }
88 
89  KOKKOS_INLINE_FUNCTION
90  void operator()( const size_type k, const size_type tidx ) const {
91  for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
92  dst(k).fastAccessCoeff(i) = src(idx(k), col).fastAccessCoeff(i);
93  }
94 
95  static void pack(const DstView& dst,
96  const SrcView& src,
97  const IdxView& idx,
98  size_t col) {
99  Kokkos::parallel_for(
100  Kokkos::MPVectorWorkConfig<execution_space>( idx.size(), BlockSize ),
101  PackArraySingleColumn(dst,src,idx,col) );
102  }
103  };
104 
105  template <typename DstView, typename SrcView, typename IdxView>
106  struct PackArrayMultiColumn<
107  DstView, SrcView, IdxView,
108  typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
109  Kokkos::is_view_uq_pce<SrcView>::value >::type >
110  {
112  typedef typename execution_space::size_type size_type;
113 
114  static const unsigned BlockSize = 32;
115 
116  DstView dst;
117  SrcView src;
118  IdxView idx;
119  size_t numCols;
120 
121  PackArrayMultiColumn(const DstView& dst_,
122  const SrcView& src_,
123  const IdxView& idx_,
124  size_t numCols_) :
125  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
126 
127  KOKKOS_INLINE_FUNCTION
128  void operator()( const size_type k ) const {
129  const typename IdxView::value_type localRow = idx(k);
130  const size_t offset = k*numCols;
131  for (size_t j = 0; j < numCols; ++j)
132  dst(offset + j) = src(localRow, j);
133  }
134 
135  KOKKOS_INLINE_FUNCTION
136  void operator()( const size_type k, const size_type tidx ) const {
137  const typename IdxView::value_type localRow = idx(k);
138  const size_t offset = k*numCols;
139  for (size_t j = 0; j < numCols; ++j)
140  for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
141  dst(offset + j).fastAccessCoeff(i) =
142  src(localRow, j).fastAccessCoeff(i);
143  }
144 
145  static void pack(const DstView& dst,
146  const SrcView& src,
147  const IdxView& idx,
148  size_t numCols) {
149  Kokkos::parallel_for(
150  Kokkos::MPVectorWorkConfig<execution_space>( idx.size(), BlockSize ),
151  PackArrayMultiColumn(dst,src,idx,numCols) );
152  }
153  };
154 
155  template <typename DstView, typename SrcView, typename IdxView,
156  typename ColView>
157  struct PackArrayMultiColumnVariableStride<
158  DstView, SrcView, IdxView, ColView,
159  typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
160  Kokkos::is_view_uq_pce<SrcView>::value >::type >
161  {
163  typedef typename execution_space::size_type size_type;
164 
165  static const unsigned BlockSize = 32;
166 
167  DstView dst;
168  SrcView src;
169  IdxView idx;
170  ColView col;
171  size_t numCols;
172 
174  const SrcView& src_,
175  const IdxView& idx_,
176  const ColView& col_,
177  size_t numCols_) :
178  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
179 
180  KOKKOS_INLINE_FUNCTION
181  void operator()( const size_type k ) const {
182  const typename IdxView::value_type localRow = idx(k);
183  const size_t offset = k*numCols;
184  for (size_t j = 0; j < numCols; ++j)
185  dst(offset + j) = src(localRow, col(j));
186  }
187 
188  KOKKOS_INLINE_FUNCTION
189  void operator()( const size_type k, const size_type tidx ) const {
190  const typename IdxView::value_type localRow = idx(k);
191  const size_t offset = k*numCols;
192  for (size_t j = 0; j < numCols; ++j)
193  for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
194  dst(offset + j).fastAccessCoeff(i) =
195  src(localRow, col(j)).fastAccessCoeff(i);
196  }
197 
198  static void pack(const DstView& dst,
199  const SrcView& src,
200  const IdxView& idx,
201  const ColView& col,
202  size_t numCols) {
203  Kokkos::parallel_for(
204  Kokkos::MPVectorWorkConfig<execution_space>( idx.size(), BlockSize ),
205  PackArrayMultiColumnVariableStride(dst,src,idx,col,numCols) );
206  }
207  };
208 
209  template <typename ExecutionSpace,
210  typename DstView,
211  typename SrcView,
212  typename IdxView,
213  typename Op>
214  struct UnpackArrayMultiColumn<
215  ExecutionSpace, DstView, SrcView, IdxView, Op,
216  typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
217  Kokkos::is_view_uq_pce<SrcView>::value >::type >
218  {
220  typedef typename execution_space::size_type size_type;
221 
222  static const unsigned BlockSize = 32;
223 
224  DstView dst;
225  SrcView src;
226  IdxView idx;
227  Op op;
228  size_t numCols;
229 
230  UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
231  const DstView& dst_,
232  const SrcView& src_,
233  const IdxView& idx_,
234  const Op& op_,
235  const size_t numCols_) :
236  dst (dst_),
237  src (src_),
238  idx (idx_),
239  op (op_),
240  numCols (numCols_)
241  {}
242 
243  template <class TagType>
244  KOKKOS_INLINE_FUNCTION void
245  operator() (TagType tag, const size_type k) const
246  {
247  const typename IdxView::value_type localRow = idx(k);
248  const size_t offset = k*numCols;
249  for (size_t j = 0; j < numCols; ++j) {
250  op (tag, dst(localRow, j), src(offset+j));
251  }
252  }
253 
254  template <class TagType>
255  KOKKOS_INLINE_FUNCTION void
256  operator() (TagType tag, const size_type k, const size_type tidx) const
257  {
258  const typename IdxView::value_type localRow = idx(k);
259  const size_t offset = k*numCols;
260  for (size_t j = 0; j < numCols; ++j) {
261  for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize) {
262  op (tag, dst(localRow, j).fastAccessCoeff(i),
263  src(offset+j).fastAccessCoeff(i));
264  }
265  }
266  }
267 
268  static void
269  unpack (const ExecutionSpace& execSpace,
270  const DstView& dst,
271  const SrcView& src,
272  const IdxView& idx,
273  const Op& op,
274  const size_t numCols,
275  const bool use_atomic_updates)
276  {
277  if (use_atomic_updates) {
278  Kokkos::parallel_for
279  ("Tpetra::MultiVector (Sacado::UQ::PCE) unpack (constant stride)",
281  idx.size (), BlockSize),
282  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
283  }
284  else {
285  Kokkos::parallel_for
286  ("Tpetra::MultiVector (Sacado::UQ::PCE) unpack (constant stride)",
288  idx.size (), BlockSize),
289  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
290  }
291  }
292  };
293 
294  template <typename ExecutionSpace,
295  typename DstView,
296  typename SrcView,
297  typename IdxView,
298  typename ColView,
299  typename Op>
300  struct UnpackArrayMultiColumnVariableStride<
301  ExecutionSpace, DstView, SrcView, IdxView, ColView, Op,
302  typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
303  Kokkos::is_view_uq_pce<SrcView>::value >::type>
304  {
306  typedef typename execution_space::size_type size_type;
307 
308  static const unsigned BlockSize = 32;
309 
310  DstView dst;
311  SrcView src;
312  IdxView idx;
313  ColView col;
314  Op op;
315  size_t numCols;
316 
317  UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
318  const DstView& dst_,
319  const SrcView& src_,
320  const IdxView& idx_,
321  const ColView& col_,
322  const Op& op_,
323  size_t numCols_) :
324  dst(dst_), src(src_), idx(idx_), col(col_), op(op_), numCols(numCols_) {}
325 
326  template <class TagType>
327  KOKKOS_INLINE_FUNCTION void
328  operator()( TagType tag, const size_type k ) const {
329  const typename IdxView::value_type localRow = idx(k);
330  const size_t offset = k*numCols;
331  for (size_t j = 0; j < numCols; ++j)
332  op( tag, dst(localRow,col(j)), src(offset+j) );
333  }
334 
335  template <class TagType>
336  KOKKOS_INLINE_FUNCTION void
337  operator()( TagType tag, const size_type k, const size_type tidx ) const {
338  const typename IdxView::value_type localRow = idx(k);
339  const size_t offset = k*numCols;
340  for (size_t j = 0; j < numCols; ++j)
341  for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
342  op( tag, dst(localRow,col(j)).fastAccessCoeff(i),
343  src(offset+j).fastAccessCoeff(i) );
344  }
345 
346  static void
347  unpack (const ExecutionSpace& execSpace,
348  const DstView& dst,
349  const SrcView& src,
350  const IdxView& idx,
351  const ColView& col,
352  const Op& op,
353  const size_t numCols,
354  const bool use_atomic_updates)
355  {
356  if (use_atomic_updates) {
357  Kokkos::parallel_for
358  ("Tpetra::MultiVector unpack (Sacado::UQ::PCE) (nonconstant stride)",
360  idx.size (), BlockSize),
361  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
362  idx, col, op, numCols));
363  }
364  else {
365  Kokkos::parallel_for
366  ("Tpetra::MultiVector unpack (Sacado::UQ::PCE) (nonconstant stride)",
368  idx.size (), BlockSize),
369  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
370  idx, col, op, numCols));
371  }
372  }
373  };
374 
375  template <typename DstView, typename SrcView,
376  typename DstIdxView, typename SrcIdxView, typename Op>
377  struct PermuteArrayMultiColumn<
378  DstView, SrcView, DstIdxView, SrcIdxView, Op,
379  typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
380  Kokkos::is_view_uq_pce<SrcView>::value >::type>
381  {
383  typedef typename execution_space::size_type size_type;
384 
385  static const unsigned BlockSize = 32;
386 
387  DstView dst;
388  SrcView src;
389  DstIdxView dst_idx;
390  SrcIdxView src_idx;
391  size_t numCols;
392  Op op;
393 
394  PermuteArrayMultiColumn(const DstView& dst_,
395  const SrcView& src_,
396  const DstIdxView& dst_idx_,
397  const SrcIdxView& src_idx_,
398  size_t numCols_, const Op& op_) :
399  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
400  numCols(numCols_), op(op_) {}
401 
402  KOKKOS_INLINE_FUNCTION
403  void operator()( const size_type k ) const {
404  const typename DstIdxView::value_type toRow = dst_idx(k);
405  const typename SrcIdxView::value_type fromRow = src_idx(k);
406  nonatomic_tag tag; // permute does not need atomics
407  for (size_t j = 0; j < numCols; ++j)
408  op(tag, dst(toRow, j),src(fromRow, j));
409  }
410 
411  KOKKOS_INLINE_FUNCTION
412  void operator()( const size_type k, const size_type tidx ) const {
413  const typename DstIdxView::value_type toRow = dst_idx(k);
414  const typename SrcIdxView::value_type fromRow = src_idx(k);
415  nonatomic_tag tag; // permute does not need atomics
416  for (size_t j = 0; j < numCols; ++j)
417  for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
418  op(tag, dst(toRow, j).fastAccessCoeff(i),
419  src(fromRow, j).fastAccessCoeff(i));
420  }
421 
422  static void permute(const DstView& dst,
423  const SrcView& src,
424  const DstIdxView& dst_idx,
425  const SrcIdxView& src_idx,
426  size_t numCols,
427  const Op& op) {
428  const size_type n = std::min( dst_idx.size(), src_idx.size() );
429  Kokkos::parallel_for(
431  PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols,op) );
432  }
433  };
434 
435  template <typename DstView, typename SrcView,
436  typename DstIdxView, typename SrcIdxView,
437  typename DstColView, typename SrcColView, typename Op>
438  struct PermuteArrayMultiColumnVariableStride<
439  DstView, SrcView, DstIdxView, SrcIdxView, DstColView, SrcColView, Op,
440  typename std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
441  Kokkos::is_view_uq_pce<SrcView>::value >::type >
442  {
444  typedef typename execution_space::size_type size_type;
445 
446  static const unsigned BlockSize = 32;
447 
448  DstView dst;
449  SrcView src;
450  DstIdxView dst_idx;
451  SrcIdxView src_idx;
452  DstColView dst_col;
453  SrcColView src_col;
454  size_t numCols;
455  Op op;
456 
458  const SrcView& src_,
459  const DstIdxView& dst_idx_,
460  const SrcIdxView& src_idx_,
461  const DstColView& dst_col_,
462  const SrcColView& src_col_,
463  size_t numCols_,
464  const Op& op_) :
465  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
466  dst_col(dst_col_), src_col(src_col_),
467  numCols(numCols_), op(op_) {}
468 
469  KOKKOS_INLINE_FUNCTION
470  void operator()( const size_type k ) const {
471  const typename DstIdxView::value_type toRow = dst_idx(k);
472  const typename SrcIdxView::value_type fromRow = src_idx(k);
473  nonatomic_tag tag; // permute does not need atomics
474  for (size_t j = 0; j < numCols; ++j)
475  op(tag, dst(toRow, dst_col(j)),src(fromRow, src_col(j)));
476  }
477 
478  KOKKOS_INLINE_FUNCTION
479  void operator()( const size_type k, const size_type tidx ) const {
480  const typename DstIdxView::value_type toRow = dst_idx(k);
481  const typename SrcIdxView::value_type fromRow = src_idx(k);
482  nonatomic_tag tag; // permute does not need atomics
483  for (size_t j = 0; j < numCols; ++j)
484  for (size_type i=tidx; i<Kokkos::dimension_scalar(dst); i+=BlockSize)
485  op(tag, dst(toRow, dst_col(j)).fastAccessCoeff(i),
486  src(fromRow, src_col(j)).fastAccessCoeff(i));
487  }
488 
489  static void permute(const DstView& dst,
490  const SrcView& src,
491  const DstIdxView& dst_idx,
492  const SrcIdxView& src_idx,
493  const DstColView& dst_col,
494  const SrcColView& src_col,
495  size_t numCols,
496  const Op& op) {
497  const size_type n = std::min( dst_idx.size(), src_idx.size() );
498  Kokkos::parallel_for(
500  PermuteArrayMultiColumnVariableStride(
501  dst,src,dst_idx,src_idx,dst_col,src_col,numCols,op) );
502  }
503  };
504 
505 } // Details namespace
506 } // KokkosRefactor namespace
507 } // Tpetra namespace
508 
509 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
Kokkos::DefaultExecutionSpace execution_space
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
Team-based parallel work configuration for Sacado::MP::Vector.