Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
TestSpMv.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <iostream>
42 
43 // MP::Vector and Matrix
45 #include "KokkosSparse_CrsMatrix.hpp"
46 #include "KokkosSparse_spmv.hpp"
48 
49 // Compile-time loops
50 #include "Sacado_mpl_range_c.hpp"
51 #include "Sacado_mpl_for_each.hpp"
52 #include "Sacado_mpl_integral_c.hpp"
53 
54 // Utilities
55 #include "impl/Kokkos_Timer.hpp"
56 
57 template< typename IntType >
58 inline
59 IntType map_fem_graph_coord( const IntType & N ,
60  const IntType & i ,
61  const IntType & j ,
62  const IntType & k )
63 {
64  return k + N * ( j + N * i );
65 }
66 
67 inline
68 size_t generate_fem_graph( size_t N ,
69  std::vector< std::vector<size_t> > & graph )
70 {
71  graph.resize( N * N * N , std::vector<size_t>() );
72 
73  size_t total = 0 ;
74 
75  for ( int i = 0 ; i < (int) N ; ++i ) {
76  for ( int j = 0 ; j < (int) N ; ++j ) {
77  for ( int k = 0 ; k < (int) N ; ++k ) {
78 
79  const size_t row = map_fem_graph_coord((int)N,i,j,k);
80 
81  graph[row].reserve(27);
82 
83  for ( int ii = -1 ; ii < 2 ; ++ii ) {
84  for ( int jj = -1 ; jj < 2 ; ++jj ) {
85  for ( int kk = -1 ; kk < 2 ; ++kk ) {
86  if ( 0 <= i + ii && i + ii < (int) N &&
87  0 <= j + jj && j + jj < (int) N &&
88  0 <= k + kk && k + kk < (int) N ) {
89  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
90 
91  graph[row].push_back(col);
92  }
93  }}}
94  total += graph[row].size();
95  }}}
96 
97  return total ;
98 }
99 
100 template <typename StorageType, typename MultiplyTag>
101 std::vector<double>
102 test_mpvector_spmv(const int ensemble_length,
103  const int nGrid,
104  const int iterCount,
105  KokkosSparse::DeviceConfig dev_config,
106  MultiplyTag tag)
107 {
108  typedef StorageType storage_type;
109  typedef typename storage_type::value_type value_type;
110  typedef typename storage_type::ordinal_type ordinal_type;
112  typedef Kokkos::Device<execution_space, typename execution_space::memory_space> device_type;
113  typedef Sacado::MP::Vector<StorageType> VectorType;
114  typedef Kokkos::LayoutRight Layout;
115  typedef Kokkos::View< VectorType*, Layout, execution_space > vector_type;
116  typedef KokkosSparse::CrsMatrix< VectorType, ordinal_type, device_type > matrix_type;
117  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
118  typedef typename matrix_type::values_type matrix_values_type;
119 
120  //------------------------------
121  // Generate graph for "FEM" box structure:
122 
123  std::vector< std::vector<size_t> > fem_graph;
124  const size_t fem_length = nGrid * nGrid * nGrid;
125  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
126 
127  //------------------------------
128  // Generate input multivector:
129 
130  vector_type x =
131  vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length, ensemble_length);
132  vector_type y =
133  vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length, ensemble_length);
134 
135  //------------------------------
136 
137  matrix_graph_type matrix_graph =
138  Kokkos::create_staticcrsgraph<matrix_graph_type>(
139  std::string("test crs graph"), fem_graph);
140  matrix_values_type matrix_values =
141  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length, ensemble_length);
142  matrix_type matrix("block_matrix", fem_length, matrix_values, matrix_graph);
143  matrix.dev_config = dev_config;
144 
145  //------------------------------
146  // Fill:
147 
148  {
149  // The VectorType may be dynamic (with allocated memory)
150  // so cannot pass a VectorType value to the device.
151  // Get an array-of-intrinsic View and fill that view.
152  typename vector_type::array_type xx( x );
153  typename vector_type::array_type yy( y );
154  typename matrix_values_type::array_type mm( matrix_values );
155 
156  Kokkos::deep_copy( xx , value_type(1.0) );
157  Kokkos::deep_copy( yy , value_type(1.0) );
158  Kokkos::deep_copy( mm , value_type(1.0) );
159  }
160 
161  //------------------------------
162 
163  // One iteration to warm up
164  Stokhos::multiply( matrix, x, y, tag );
165 
166  execution_space().fence();
167  Kokkos::Impl::Timer clock ;
168  for (int iter = 0; iter < iterCount; ++iter) {
169  Stokhos::multiply( matrix, x, y, tag );
170  }
171  execution_space().fence();
172 
173  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
174  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
175 
176  std::vector<double> perf(5);
177  perf[0] = fem_length;
178  perf[1] = ensemble_length;
179  perf[2] = graph_length;
180  perf[3] = seconds_per_iter;
181  perf[4] = flops / seconds_per_iter;
182  return perf;
183 }
184 
185 template <typename ScalarType, typename OrdinalType, typename Device>
186 std::vector<double>
187 test_scalar_spmv(const int ensemble_length,
188  const int nGrid,
189  const int iterCount,
190  KokkosSparse::DeviceConfig dev_config)
191 {
192  typedef ScalarType value_type;
193  typedef OrdinalType ordinal_type;
194  typedef Device execution_space;
195  typedef Kokkos::Device<execution_space, typename execution_space::memory_space> device_type;
196  typedef Kokkos::View< value_type*, execution_space > vector_type;
197  typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, device_type > matrix_type;
198  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
199  typedef typename matrix_type::values_type matrix_values_type;
200 
201  //------------------------------
202  // Generate graph for "FEM" box structure:
203 
204  std::vector< std::vector<size_t> > fem_graph;
205  const size_t fem_length = nGrid * nGrid * nGrid;
206  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
207 
208  //------------------------------
209  // Generate input multivector:
210 
211  std::vector<vector_type> x(ensemble_length);
212  std::vector<vector_type> y(ensemble_length);
213  for (int e=0; e<ensemble_length; ++e) {
214  x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length);
215  y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length);
216 
217  Kokkos::deep_copy( x[e] , value_type(1.0) );
218  Kokkos::deep_copy( y[e] , value_type(0.0) );
219  }
220 
221  //------------------------------
222 
223  std::vector<matrix_type> matrix(ensemble_length);
224  for (int e=0; e<ensemble_length; ++e) {
225  matrix_graph_type matrix_graph =
226  Kokkos::create_staticcrsgraph<matrix_graph_type>(
227  std::string("test crs graph"), fem_graph);
228  matrix_values_type matrix_values =
229  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length);
230  matrix[e] = matrix_type("matrix", fem_length, matrix_values, matrix_graph);
231 
232  Kokkos::deep_copy( matrix[e].values , value_type(1.0) );
233  }
234 
235  //------------------------------
236 
237  // One iteration to warm up
238  for (int iter = 0; iter < iterCount; ++iter) {
239  for (int e=0; e<ensemble_length; ++e) {
240  KokkosSparse::spmv( "N" , value_type(1.0), matrix[e], x[e] , value_type(0.0), y[e]);
241  }
242  }
243 
244  execution_space().fence();
245  Kokkos::Impl::Timer clock ;
246  for (int iter = 0; iter < iterCount; ++iter) {
247  for (int e=0; e<ensemble_length; ++e) {
248  KokkosSparse::spmv( "N" , value_type(1.0), matrix[e], x[e] , value_type(0.0), y[e]);
249  }
250  }
251  execution_space().fence();
252 
253  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
254  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
255 
256  std::vector<double> perf(5);
257  perf[0] = fem_length;
258  perf[1] = ensemble_length;
259  perf[2] = graph_length;
260  perf[3] = seconds_per_iter;
261  perf[4] = flops / seconds_per_iter;
262  return perf;
263 }
264 
265 template <class Storage>
266 struct PerformanceDriverOp {
267  typedef typename Storage::value_type Scalar;
268  typedef typename Storage::ordinal_type Ordinal;
270  const int nGrid, nIter;
271  KokkosSparse::DeviceConfig dev_config;
272 
273  PerformanceDriverOp(const int nGrid_, const int nIter_,
274  KokkosSparse::DeviceConfig dev_config_) :
275  nGrid(nGrid_), nIter(nIter_), dev_config(dev_config_) {}
276 
277  template <typename ArgT>
278  void operator() (ArgT arg) const {
279  const int ensemble = ArgT::value;
280  typedef typename Storage::template apply_N<ensemble> NewStorageApply;
281  typedef typename NewStorageApply::type storage_type;
282 
283  const std::vector<double> perf_scalar =
284  test_scalar_spmv<Scalar,Ordinal,Device>(
285  ensemble, nGrid, nIter, dev_config );
286 
287  const std::vector<double> perf_mpvector =
288  test_mpvector_spmv<storage_type>(
290 
291  std::cout << nGrid << " , "
292  << perf_scalar[0] << " , "
293  << perf_scalar[2] << " , "
294  << perf_scalar[1] << " , "
295  << perf_scalar[3] << " , "
296  << perf_scalar[4] / perf_scalar[4] << " , "
297  << perf_scalar[4] << " , "
298  << perf_mpvector[4]/ perf_scalar[4] << " , "
299  << perf_mpvector[4] << " , "
300  << std::endl;
301  }
302 };
303 
304 template <class Storage, int entry_min, int entry_max, int entry_step>
305 void performance_test_driver( const int nGrid,
306  const int nIter,
307  KokkosSparse::DeviceConfig dev_config)
308 {
309  std::cout.precision(8);
310  std::cout << std::endl
311  << "\"Grid Size\" , "
312  << "\"FEM Size\" , "
313  << "\"FEM Graph Size\" , "
314  << "\"Ensemble Size\" , "
315  << "\"Scalar SpMv Time\" , "
316  << "\"Scalar SpMv Speedup\" , "
317  << "\"Scalar SpMv GFLOPS\" , "
318  << "\"MPVector SpMv Speedup\" , "
319  << "\"MPVector SpMv GFLOPS\" , "
320  << std::endl;
321 
322  // Loop over [entry_min, entry_max] vector entries per thread
323  typedef Sacado::mpl::range_c< int, entry_min, entry_max+1, entry_step > Range;
324  PerformanceDriverOp<Storage> op(nGrid, nIter, dev_config);
325  Sacado::mpl::for_each_no_kokkos<Range> f(op);
326 }
Stokhos::StandardStorage< int, double > storage_type
Stokhos_MV_Multiply_Op< Stokhos::DefaultMultiply > DefaultMultiply
Kokkos::DefaultExecutionSpace execution_space
std::vector< double > test_scalar_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config)
Definition: TestSpMv.hpp:187
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
std::vector< double > test_mpvector_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config, MultiplyTag tag)
Definition: TestSpMv.hpp:102
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition: TestSpMv.hpp:59
KokkosSparse::DeviceConfig dev_config
Definition: TestSpMv.hpp:271
Storage::execution_space Device
Definition: TestSpMv.hpp:269
Storage::value_type Scalar
Definition: TestSpMv.hpp:267
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
Definition: TestSpMv.hpp:68
Storage::ordinal_type Ordinal
Definition: TestSpMv.hpp:268
void performance_test_driver(const int nGrid, const int nIter, KokkosSparse::DeviceConfig dev_config)
Definition: TestSpMv.hpp:305
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Kokkos::Example::FENL::DeviceConfig dev_config
PerformanceDriverOp(const int nGrid_, const int nIter_, KokkosSparse::DeviceConfig dev_config_)
Definition: TestSpMv.hpp:273
ScalarType f(const Teuchos::Array< ScalarType > &x, double a, double b)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)