Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
TestSpMM.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <iostream>
42 
43 // Kokkos CrsMatrix
44 #include "KokkosSparse_CrsMatrix.hpp"
45 #include "KokkosSparse_spmv.hpp"
46 
47 
48 // Utilities
49 #include "impl/Kokkos_Timer.hpp"
50 
51 template< typename IntType >
52 inline
53 IntType map_fem_graph_coord( const IntType & N ,
54  const IntType & i ,
55  const IntType & j ,
56  const IntType & k )
57 {
58  return k + N * ( j + N * i );
59 }
60 
61 inline
62 size_t generate_fem_graph( size_t N ,
63  std::vector< std::vector<size_t> > & graph )
64 {
65  graph.resize( N * N * N , std::vector<size_t>() );
66 
67  size_t total = 0 ;
68 
69  for ( int i = 0 ; i < (int) N ; ++i ) {
70  for ( int j = 0 ; j < (int) N ; ++j ) {
71  for ( int k = 0 ; k < (int) N ; ++k ) {
72 
73  const size_t row = map_fem_graph_coord((int)N,i,j,k);
74 
75  graph[row].reserve(27);
76 
77  for ( int ii = -1 ; ii < 2 ; ++ii ) {
78  for ( int jj = -1 ; jj < 2 ; ++jj ) {
79  for ( int kk = -1 ; kk < 2 ; ++kk ) {
80  if ( 0 <= i + ii && i + ii < (int) N &&
81  0 <= j + jj && j + jj < (int) N &&
82  0 <= k + kk && k + kk < (int) N ) {
83  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
84 
85  graph[row].push_back(col);
86  }
87  }}}
88  total += graph[row].size();
89  }}}
90 
91  return total ;
92 }
93 
94 template <typename ScalarType, typename OrdinalType, typename Device>
95 void
96 test_spmm(const OrdinalType ensemble_length,
97  const OrdinalType nGrid,
98  const OrdinalType iterCount,
99  std::vector<double>& scalar_perf,
100  std::vector<double>& block_left_perf,
101  std::vector<double>& block_right_perf)
102 {
103  typedef ScalarType value_type;
104  typedef OrdinalType ordinal_type;
105  typedef Device execution_space;
106  typedef Kokkos::View< value_type*, execution_space > vector_type;
107  typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > left_multivec_type;
108  //typedef Kokkos::View< value_type**, Kokkos::LayoutRight, execution_space > right_multivec_type;
109  typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > matrix_type;
110  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
111  typedef typename matrix_type::values_type matrix_values_type;
112 
113  //------------------------------
114  // Generate graph for "FEM" box structure:
115 
116  std::vector< std::vector<size_t> > fem_graph;
117  const size_t fem_length = nGrid * nGrid * nGrid;
118  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
119 
120  //------------------------------
121  // Generate input vectors:
122 
123  std::vector<vector_type> x(ensemble_length);
124  std::vector<vector_type> y(ensemble_length);
125  for (ordinal_type e=0; e<ensemble_length; ++e) {
126  x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length);
127  y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length);
128 
129  Kokkos::deep_copy( x[e] , value_type(1.0) );
130  Kokkos::deep_copy( y[e] , value_type(0.0) );
131  }
132  left_multivec_type xl(Kokkos::ViewAllocateWithoutInitializing("xl"), fem_length, ensemble_length);
133  left_multivec_type yl(Kokkos::ViewAllocateWithoutInitializing("yl"), fem_length, ensemble_length);
134  // right_multivec_type xr(Kokkos::ViewAllocateWithoutInitializing("xr"), fem_length, ensemble_length);
135  // right_multivec_type yr(Kokkos::ViewAllocateWithoutInitializing("yr"), fem_length, ensemble_length);
136  Kokkos::deep_copy(xl, value_type(1.0));
137  //Kokkos::deep_copy(xr, value_type(1.0));
138  Kokkos::deep_copy(yl, value_type(0.0));
139  //Kokkos::deep_copy(yr, value_type(0.0));
140 
141  //------------------------------
142  // Generate matrix
143 
144  matrix_graph_type matrix_graph =
145  Kokkos::create_staticcrsgraph<matrix_graph_type>(
146  std::string("test crs graph"), fem_graph);
147  matrix_values_type matrix_values =
148  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length);
149  matrix_type matrix("matrix", fem_length, matrix_values, matrix_graph);
150  Kokkos::deep_copy( matrix_values , value_type(1.0) );
151 
152  //------------------------------
153  // Scalar multiply
154 
155  {
156  // warm up
157  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
158  for (ordinal_type e=0; e<ensemble_length; ++e) {
159  KokkosSparse::spmv( "N", value_type(1.0), matrix, x[e] , value_type(0.0) , y[e]);
160  }
161  }
162 
163  execution_space().fence();
164  Kokkos::Impl::Timer clock ;
165  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
166  for (ordinal_type e=0; e<ensemble_length; ++e) {
167  KokkosSparse::spmv( "N", value_type(1.0), matrix, x[e] , value_type(0.0) , y[e]);
168  }
169  }
170  execution_space().fence();
171 
172  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
173  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
174 
175  scalar_perf.resize(5);
176  scalar_perf[0] = fem_length;
177  scalar_perf[1] = ensemble_length;
178  scalar_perf[2] = graph_length;
179  scalar_perf[3] = seconds_per_iter;
180  scalar_perf[4] = flops / seconds_per_iter;
181  }
182 
183  //------------------------------
184  // Block-left multiply
185 
186  {
187  // warm up
188  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
189  KokkosSparse::spmv( "N", value_type(1.0), matrix, xl , value_type(0.0) , yl);
190  }
191 
192  execution_space().fence();
193  Kokkos::Impl::Timer clock ;
194  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
195  KokkosSparse::spmv( "N", value_type(1.0), matrix, xl , value_type(0.0) , yl);
196  }
197  execution_space().fence();
198 
199  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
200  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
201 
202  block_left_perf.resize(5);
203  block_left_perf[0] = fem_length;
204  block_left_perf[1] = ensemble_length;
205  block_left_perf[2] = graph_length;
206  block_left_perf[3] = seconds_per_iter;
207  block_left_perf[4] = flops / seconds_per_iter;
208  }
209 
210 #if 0
211  //------------------------------
212  // Block-right multiply
213 
214  {
215  // warm up
216  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
217  KokkosSparse::spmv( "N", value_type(1.0), matrix, xr , value_type(0.0) , yr);
218  }
219 
220  execution_space().fence();
221  Kokkos::Impl::Timer clock ;
222  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
223  KokkosSparse::spmv( "N", value_type(1.0), matrix, xr , value_type(0.0) , yr);
224  }
225  execution_space().fence();
226 
227  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
228  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
229 
230  block_right_perf.resize(5);
231  block_right_perf[0] = fem_length;
232  block_right_perf[1] = ensemble_length;
233  block_right_perf[2] = graph_length;
234  block_right_perf[3] = seconds_per_iter;
235  block_right_perf[4] = flops / seconds_per_iter;
236  }
237 #endif
238 
239 }
240 
241 template <typename Scalar, typename Ordinal, typename Device>
243  const Ordinal nIter,
244  const Ordinal ensemble_min,
245  const Ordinal ensemble_max,
246  const Ordinal ensemble_step )
247 {
248  std::cout.precision(8);
249  std::cout << std::endl
250  << "\"Grid Size\" , "
251  << "\"FEM Size\" , "
252  << "\"FEM Graph Size\" , "
253  << "\"Ensemble Size\" , "
254  << "\"Scalar SpMM Time\" , "
255  << "\"Scalar SpMM Speedup\" , "
256  << "\"Scalar SpMM GFLOPS\" , "
257  << "\"Block-Left SpMM Speedup\" , "
258  << "\"Block-Left SpMM GFLOPS\" , "
259  //<< "\"Block_Right SpMM Speedup\" , "
260  //<< "\"Block_Right SpMM GFLOPS\" , "
261  << std::endl;
262 
263  std::vector<double> perf_scalar, perf_block_left, perf_block_right;
264  for (Ordinal e=ensemble_min; e<=ensemble_max; e+=ensemble_step) {
265 
266  test_spmm<Scalar,Ordinal,Device>(
267  e, nGrid, nIter, perf_scalar, perf_block_left, perf_block_right );
268 
269  std::cout << nGrid << " , "
270  << perf_scalar[0] << " , "
271  << perf_scalar[2] << " , "
272  << perf_scalar[1] << " , "
273  << perf_scalar[3] << " , "
274  << perf_scalar[4] / perf_scalar[4] << " , "
275  << perf_scalar[4] << " , "
276  << perf_block_left[4]/ perf_scalar[4] << " , "
277  << perf_block_left[4] << " , "
278  //<< perf_block_right[4]/ perf_scalar[4] << " , "
279  //<< perf_block_right[4] << " , "
280  << std::endl;
281 
282  }
283 }
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
Definition: TestSpMM.hpp:62
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition: TestSpMM.hpp:53
void test_spmm(const OrdinalType ensemble_length, const OrdinalType nGrid, const OrdinalType iterCount, std::vector< double > &scalar_perf, std::vector< double > &block_left_perf, std::vector< double > &block_right_perf)
Definition: TestSpMM.hpp:96
Kokkos::DefaultExecutionSpace execution_space
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
void performance_test_driver(const Ordinal nGrid, const Ordinal nIter, const Ordinal ensemble_min, const Ordinal ensemble_max, const Ordinal ensemble_step)
Definition: TestSpMM.hpp:242
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)