Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_Relaxation_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_RELAXATION_DEF_HPP
44 #define IFPACK2_RELAXATION_DEF_HPP
45 
46 #include "Teuchos_StandardParameterEntryValidators.hpp"
47 #include "Teuchos_TimeMonitor.hpp"
48 #include "Tpetra_CrsMatrix.hpp"
49 #include "Tpetra_Experimental_BlockCrsMatrix.hpp"
50 #include "Ifpack2_Utilities.hpp"
51 #include "Ifpack2_Relaxation_decl.hpp"
52 #include "MatrixMarket_Tpetra.hpp"
53 #include <cstdlib>
54 #include <sstream>
55 #include "KokkosSparse_gauss_seidel.hpp"
56 
57 
58 // mfh 28 Mar 2013: Uncomment out these three lines to compute
59 // statistics on diagonal entries in compute().
60 // #ifndef IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
61 // # define IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS 1
62 // #endif // IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
63 
64 namespace {
65  // Validate that a given int is nonnegative.
66  class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
67  public:
68  // Constructor (does nothing).
69  NonnegativeIntValidator () {}
70 
71  // ParameterEntryValidator wants this method.
72  Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
73  return Teuchos::null;
74  }
75 
76  // Actually validate the parameter's value.
77  void
78  validate (const Teuchos::ParameterEntry& entry,
79  const std::string& paramName,
80  const std::string& sublistName) const
81  {
82  using std::endl;
83  Teuchos::any anyVal = entry.getAny (true);
84  const std::string entryName = entry.getAny (false).typeName ();
85 
86  TEUCHOS_TEST_FOR_EXCEPTION(
87  anyVal.type () != typeid (int),
88  Teuchos::Exceptions::InvalidParameterType,
89  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
90  << "\" has the wrong type." << endl << "Parameter: " << paramName
91  << endl << "Type specified: " << entryName << endl
92  << "Type required: int" << endl);
93 
94  const int val = Teuchos::any_cast<int> (anyVal);
95  TEUCHOS_TEST_FOR_EXCEPTION(
96  val < 0, Teuchos::Exceptions::InvalidParameterValue,
97  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
98  << "\" is negative." << endl << "Parameter: " << paramName
99  << endl << "Value specified: " << val << endl
100  << "Required range: [0, INT_MAX]" << endl);
101  }
102 
103  // ParameterEntryValidator wants this method.
104  const std::string getXMLTypeName () const {
105  return "NonnegativeIntValidator";
106  }
107 
108  // ParameterEntryValidator wants this method.
109  void
110  printDoc (const std::string& docString,
111  std::ostream &out) const
112  {
113  Teuchos::StrUtils::printLines (out, "# ", docString);
114  out << "#\tValidator Used: " << std::endl;
115  out << "#\t\tNonnegativeIntValidator" << std::endl;
116  }
117  };
118 
119  // A way to get a small positive number (eps() for floating-point
120  // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
121  // define it (for example, for integer values).
122  template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
123  class SmallTraits {
124  public:
125  // Return eps if Scalar is a floating-point type, else return 1.
126  static const Scalar eps ();
127  };
128 
129  // Partial specialization for when Scalar is not a floating-point type.
130  template<class Scalar>
131  class SmallTraits<Scalar, true> {
132  public:
133  static const Scalar eps () {
134  return Teuchos::ScalarTraits<Scalar>::one ();
135  }
136  };
137 
138  // Partial specialization for when Scalar is a floating-point type.
139  template<class Scalar>
140  class SmallTraits<Scalar, false> {
141  public:
142  static const Scalar eps () {
143  return Teuchos::ScalarTraits<Scalar>::eps ();
144  }
145  };
146 
147 
148 
149 } // namespace (anonymous)
150 
151 namespace Ifpack2 {
152 
153 template<class MatrixType>
154 void Relaxation<MatrixType>::updateCachedMultiVector(const Teuchos::RCP<const Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> > & map, size_t numVecs) const{
155  // Allocate a multivector if the cached one isn't perfect
156  // Note: We check for map pointer equality here since it is much cheaper than isSameAs()
157  if(cachedMV_.is_null() || &*map != &*cachedMV_->getMap() || cachedMV_->getNumVectors() !=numVecs)
158  cachedMV_ = Teuchos::rcp(new Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>(map, numVecs, false));
159 }
160 
161 
162 template<class MatrixType>
164 setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
165 {
166  if (A.getRawPtr () != A_.getRawPtr ()) { // it's a different matrix
167  Importer_ = Teuchos::null;
168  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
169  isInitialized_ = false;
170  IsComputed_ = false;
171  diagOffsets_ = Kokkos::View<size_t*, typename node_type::device_type> ();
172  savedDiagOffsets_ = false;
173  hasBlockCrsMatrix_ = false;
174  if (! A.is_null ()) {
175  IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
176  }
177  A_ = A;
178  }
179 }
180 
181 
182 template<class MatrixType>
184 Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
185 : A_ (A),
186  NumSweeps_ (1),
187  PrecType_ (Ifpack2::Details::JACOBI),
188  DampingFactor_ (STS::one ()),
189  IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
190  false : // a reasonable default if there's no communicator
191  A->getRowMap ()->getComm ()->getSize () > 1),
192  ZeroStartingSolution_ (true),
193  DoBackwardGS_ (false),
194  DoL1Method_ (false),
195  L1Eta_ (Teuchos::as<magnitude_type> (1.5)),
196  MinDiagonalValue_ (STS::zero ()),
197  fixTinyDiagEntries_ (false),
198  checkDiagEntries_ (false),
199  is_matrix_structurally_symmetric_ (false),
200  ifpack2_dump_matrix_(false),
201  isInitialized_ (false),
202  IsComputed_ (false),
203  NumInitialize_ (0),
204  NumCompute_ (0),
205  NumApply_ (0),
206  InitializeTime_ (0.0), // Times are double anyway, so no need for ScalarTraits.
207  ComputeTime_ (0.0),
208  ApplyTime_ (0.0),
209  ComputeFlops_ (0.0),
210  ApplyFlops_ (0.0),
211  globalMinMagDiagEntryMag_ (STM::zero ()),
212  globalMaxMagDiagEntryMag_ (STM::zero ()),
213  globalNumSmallDiagEntries_ (0),
214  globalNumZeroDiagEntries_ (0),
215  globalNumNegDiagEntries_ (0),
216  globalDiagNormDiff_(Teuchos::ScalarTraits<magnitude_type>::zero()),
217  savedDiagOffsets_ (false),
218  hasBlockCrsMatrix_ (false)
219 {
220  this->setObjectLabel ("Ifpack2::Relaxation");
221 }
222 
223 //==========================================================================
224 template<class MatrixType>
226 }
227 
228 template<class MatrixType>
229 Teuchos::RCP<const Teuchos::ParameterList>
231 {
232  using Teuchos::Array;
233  using Teuchos::ParameterList;
234  using Teuchos::parameterList;
235  using Teuchos::RCP;
236  using Teuchos::rcp;
237  using Teuchos::rcp_const_cast;
238  using Teuchos::rcp_implicit_cast;
239  using Teuchos::setStringToIntegralParameter;
240  typedef Teuchos::ParameterEntryValidator PEV;
241 
242  if (validParams_.is_null ()) {
243  RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
244 
245  // Set a validator that automatically converts from the valid
246  // string options to their enum values.
247  Array<std::string> precTypes (5);
248  precTypes[0] = "Jacobi";
249  precTypes[1] = "Gauss-Seidel";
250  precTypes[2] = "Symmetric Gauss-Seidel";
251  precTypes[3] = "MT Gauss-Seidel";
252  precTypes[4] = "MT Symmetric Gauss-Seidel";
253  Array<Details::RelaxationType> precTypeEnums (5);
254  precTypeEnums[0] = Details::JACOBI;
255  precTypeEnums[1] = Details::GS;
256  precTypeEnums[2] = Details::SGS;
257  precTypeEnums[3] = Details::MTGS;
258  precTypeEnums[4] = Details::MTSGS;
259  const std::string defaultPrecType ("Jacobi");
260  setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
261  defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
262  pl.getRawPtr ());
263 
264  const int numSweeps = 1;
265  RCP<PEV> numSweepsValidator =
266  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
267  pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
268  rcp_const_cast<const PEV> (numSweepsValidator));
269 
270  const scalar_type dampingFactor = STS::one ();
271  pl->set ("relaxation: damping factor", dampingFactor);
272 
273  const bool zeroStartingSolution = true;
274  pl->set ("relaxation: zero starting solution", zeroStartingSolution);
275 
276  const bool doBackwardGS = false;
277  pl->set ("relaxation: backward mode", doBackwardGS);
278 
279  const bool doL1Method = false;
280  pl->set ("relaxation: use l1", doL1Method);
281 
282  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
283  (STM::one() + STM::one()); // 1.5
284  pl->set ("relaxation: l1 eta", l1eta);
285 
286  const scalar_type minDiagonalValue = STS::zero ();
287  pl->set ("relaxation: min diagonal value", minDiagonalValue);
288 
289  const bool fixTinyDiagEntries = false;
290  pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
291 
292  const bool checkDiagEntries = false;
293  pl->set ("relaxation: check diagonal entries", checkDiagEntries);
294 
295  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
296  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
297 
298  const bool is_matrix_structurally_symmetric = false;
299  pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
300 
301  const bool ifpack2_dump_matrix = false;
302  pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
303 
304  validParams_ = rcp_const_cast<const ParameterList> (pl);
305  }
306  return validParams_;
307 }
308 
309 
310 template<class MatrixType>
311 void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
312 {
313  using Teuchos::getIntegralValue;
314  using Teuchos::ParameterList;
315  using Teuchos::RCP;
316  typedef scalar_type ST; // just to make code below shorter
317 
318  if (pl.isType<double>("relaxation: damping factor")) {
319  // Make sure that ST=complex can run with a damping factor that is
320  // a double.
321  ST df = pl.get<double>("relaxation: damping factor");
322  pl.remove("relaxation: damping factor");
323  pl.set("relaxation: damping factor",df);
324  }
325 
326  pl.validateParametersAndSetDefaults (* getValidParameters ());
327 
328  const Details::RelaxationType precType =
329  getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
330  const int numSweeps = pl.get<int> ("relaxation: sweeps");
331  const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
332  const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
333  const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
334  const bool doL1Method = pl.get<bool> ("relaxation: use l1");
335  const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
336  const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
337  const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
338  const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
339  const bool is_matrix_structurally_symmetric = pl.get<bool> ("relaxation: symmetric matrix structure");
340  const bool ifpack2_dump_matrix = pl.get<bool> ("relaxation: ifpack2 dump matrix");
341 
342  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
343 
344 
345  // "Commit" the changes, now that we've validated everything.
346  PrecType_ = precType;
347  NumSweeps_ = numSweeps;
348  DampingFactor_ = dampingFactor;
349  ZeroStartingSolution_ = zeroStartSol;
350  DoBackwardGS_ = doBackwardGS;
351  DoL1Method_ = doL1Method;
352  L1Eta_ = l1Eta;
353  MinDiagonalValue_ = minDiagonalValue;
354  fixTinyDiagEntries_ = fixTinyDiagEntries;
355  checkDiagEntries_ = checkDiagEntries;
356  is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
357  ifpack2_dump_matrix_ = ifpack2_dump_matrix;
358  localSmoothingIndices_ = localSmoothingIndices;
359 }
360 
361 
362 template<class MatrixType>
363 void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
364 {
365  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
366  // but otherwise, we will get [unused] in pl
367  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
368 }
369 
370 
371 template<class MatrixType>
372 Teuchos::RCP<const Teuchos::Comm<int> >
374  TEUCHOS_TEST_FOR_EXCEPTION(
375  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
376  "The input matrix A is null. Please call setMatrix() with a nonnull "
377  "input matrix before calling this method.");
378  return A_->getRowMap ()->getComm ();
379 }
380 
381 
382 template<class MatrixType>
383 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
385  return A_;
386 }
387 
388 
389 template<class MatrixType>
390 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
391  typename MatrixType::global_ordinal_type,
392  typename MatrixType::node_type> >
394  TEUCHOS_TEST_FOR_EXCEPTION(
395  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
396  "The input matrix A is null. Please call setMatrix() with a nonnull "
397  "input matrix before calling this method.");
398  return A_->getDomainMap ();
399 }
400 
401 
402 template<class MatrixType>
403 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
404  typename MatrixType::global_ordinal_type,
405  typename MatrixType::node_type> >
407  TEUCHOS_TEST_FOR_EXCEPTION(
408  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
409  "The input matrix A is null. Please call setMatrix() with a nonnull "
410  "input matrix before calling this method.");
411  return A_->getRangeMap ();
412 }
413 
414 
415 template<class MatrixType>
417  return true;
418 }
419 
420 
421 template<class MatrixType>
423  return(NumInitialize_);
424 }
425 
426 
427 template<class MatrixType>
429  return(NumCompute_);
430 }
431 
432 
433 template<class MatrixType>
435  return(NumApply_);
436 }
437 
438 
439 template<class MatrixType>
441  return(InitializeTime_);
442 }
443 
444 
445 template<class MatrixType>
447  return(ComputeTime_);
448 }
449 
450 
451 template<class MatrixType>
453  return(ApplyTime_);
454 }
455 
456 
457 template<class MatrixType>
459  return(ComputeFlops_);
460 }
461 
462 
463 template<class MatrixType>
465  return(ApplyFlops_);
466 }
467 
468 
469 
470 template<class MatrixType>
472  TEUCHOS_TEST_FOR_EXCEPTION(
473  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
474  "The input matrix A is null. Please call setMatrix() with a nonnull "
475  "input matrix, then call compute(), before calling this method.");
476  // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
477  return A_->getNodeNumRows() + A_->getNodeNumEntries();
478 }
479 
480 
481 template<class MatrixType>
482 void
484 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
485  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
486  Teuchos::ETransp /* mode */,
487  scalar_type alpha,
488  scalar_type beta) const
489 {
490  using Teuchos::as;
491  using Teuchos::RCP;
492  using Teuchos::rcp;
493  using Teuchos::rcpFromRef;
494  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
496  TEUCHOS_TEST_FOR_EXCEPTION(
497  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
498  "The input matrix A is null. Please call setMatrix() with a nonnull "
499  "input matrix, then call compute(), before calling this method.");
500  TEUCHOS_TEST_FOR_EXCEPTION(
501  ! isComputed (),
502  std::runtime_error,
503  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
504  "preconditioner instance before you may call apply(). You may call "
505  "isComputed() to find out if compute() has been called already.");
506  TEUCHOS_TEST_FOR_EXCEPTION(
507  X.getNumVectors() != Y.getNumVectors(),
508  std::runtime_error,
509  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
510  "X has " << X.getNumVectors() << " columns, but Y has "
511  << Y.getNumVectors() << " columns.");
512  TEUCHOS_TEST_FOR_EXCEPTION(
513  beta != STS::zero (), std::logic_error,
514  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
515  "implemented.");
516 
517  const std::string timerName ("Ifpack2::Relaxation::apply");
518  Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
519  if (timer.is_null ()) {
520  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
521  }
522 
523  {
524  Teuchos::TimeMonitor timeMon (*timer);
525  // Special case: alpha == 0.
526  if (alpha == STS::zero ()) {
527  // No floating-point operations, so no need to update a count.
528  Y.putScalar (STS::zero ());
529  }
530  else {
531  // If X and Y alias one another, then we need to create an
532  // auxiliary vector, Xcopy (a deep copy of X).
533  RCP<const MV> Xcopy;
534  // FIXME (mfh 12 Sep 2014) This test for aliasing is incomplete.
535  {
536  auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
537  auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
538  if (X_lcl_host.data () == Y_lcl_host.data ()) {
539  Xcopy = rcp (new MV (X, Teuchos::Copy));
540  } else {
541  Xcopy = rcpFromRef (X);
542  }
543  }
544 
545  // Each of the following methods updates the flop count itself.
546  // All implementations handle zeroing out the starting solution
547  // (if necessary) themselves.
548  switch (PrecType_) {
549  case Ifpack2::Details::JACOBI:
550  ApplyInverseJacobi(*Xcopy,Y);
551  break;
552  case Ifpack2::Details::GS:
553  ApplyInverseGS(*Xcopy,Y);
554  break;
555  case Ifpack2::Details::SGS:
556  ApplyInverseSGS(*Xcopy,Y);
557  break;
558  case Ifpack2::Details::MTSGS:
559  ApplyInverseMTSGS_CrsMatrix(*Xcopy,Y);
560  break;
561  case Ifpack2::Details::MTGS:
562  ApplyInverseMTGS_CrsMatrix(*Xcopy,Y);
563  break;
564 
565  default:
566  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
567  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
568  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
569  }
570  if (alpha != STS::one ()) {
571  Y.scale (alpha);
572  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
573  const double numVectors = as<double> (Y.getNumVectors ());
574  ApplyFlops_ += numGlobalRows * numVectors;
575  }
576  }
577  }
578  ApplyTime_ += timer->totalElapsedTime ();
579  ++NumApply_;
580 }
581 
582 
583 template<class MatrixType>
584 void
586 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
587  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
588  Teuchos::ETransp mode) const
589 {
590  TEUCHOS_TEST_FOR_EXCEPTION(
591  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
592  "The input matrix A is null. Please call setMatrix() with a nonnull "
593  "input matrix, then call compute(), before calling this method.");
594  TEUCHOS_TEST_FOR_EXCEPTION(
595  ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
596  "isComputed() must be true before you may call applyMat(). "
597  "Please call compute() before calling this method.");
598  TEUCHOS_TEST_FOR_EXCEPTION(
599  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
600  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
601  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
602  A_->apply (X, Y, mode);
603 }
604 
605 
606 template<class MatrixType>
608 {
609  TEUCHOS_TEST_FOR_EXCEPTION
610  (A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::initialize: "
611  "The input matrix A is null. Please call setMatrix() with a nonnull "
612  "input matrix before calling this method.");
613  const std::string timerName ("Ifpack2::Relaxation::initialize");
614  Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
615  if (timer.is_null ()) {
616  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
617  }
618 
619  if (A_.is_null ()) {
620  hasBlockCrsMatrix_ = false;
621  }
622  else { // A_ is not null
623  Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
624  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
625  if (A_bcrs.is_null ()) {
626  hasBlockCrsMatrix_ = false;
627  }
628  else { // A_ is a block_crs_matrix_type
629  hasBlockCrsMatrix_ = true;
630  }
631  }
632 
633  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
634  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
635  TEUCHOS_TEST_FOR_EXCEPTION
636  (crsMat == NULL, std::logic_error, "Ifpack2::Relaxation::initialize: "
637  "Multithreaded Gauss-Seidel methods currently only work when the input "
638  "matrix is a Tpetra::CrsMatrix.");
639 
640  if(this->ifpack2_dump_matrix_){
641  int random_integer = rand();
642  std::stringstream ss;
643  ss << random_integer;
644  std::string str = ss.str();
645  Tpetra::MatrixMarket::Writer<crs_matrix_type> crs_writer;
646  std::string file_name = str + "_Ifpack2_MT_GS.mtx";
647  Teuchos::RCP<const crs_matrix_type> rcp_crs_mat = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
648  crs_writer.writeSparseFile(file_name, rcp_crs_mat);
649  }
650 
651  this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
652  if (mtKernelHandle_->get_gs_handle () == NULL) {
653  mtKernelHandle_->create_gs_handle ();
654  }
655  local_matrix_type kcsr = crsMat->getLocalMatrix ();
656 
657  bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
658  is_symmetric = is_symmetric || is_matrix_structurally_symmetric_;
659 
660  using KokkosSparse::Experimental::gauss_seidel_symbolic;
661  gauss_seidel_symbolic<mt_kernel_handle_type,
662  lno_row_view_t,
663  lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
664  A_->getNodeNumRows (),
665  A_->getNodeNumCols (),
666  kcsr.graph.row_map,
667  kcsr.graph.entries,
668  is_symmetric);
669  }
670 
671 
672  InitializeTime_ += timer->totalElapsedTime ();
673  ++NumInitialize_;
674  isInitialized_ = true;
675 }
676 
677 namespace Impl {
678 template <typename BlockDiagView>
679 struct InvertDiagBlocks {
680  typedef int value_type;
681  typedef typename BlockDiagView::size_type Size;
682 
683 private:
684  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
685  template <typename View>
686  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
687  typename View::device_type, Unmanaged>;
688 
689  typedef typename BlockDiagView::non_const_value_type Scalar;
690  typedef typename BlockDiagView::device_type Device;
691  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
692  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
693 
694  UnmanagedView<BlockDiagView> block_diag_;
695  // TODO Use thread team and scratch memory space. In this first
696  // pass, provide workspace for each block.
697  RWrk rwrk_buf_;
698  UnmanagedView<RWrk> rwrk_;
699  IWrk iwrk_buf_;
700  UnmanagedView<IWrk> iwrk_;
701 
702 public:
703  InvertDiagBlocks (BlockDiagView& block_diag)
704  : block_diag_(block_diag)
705  {
706  const auto blksz = block_diag.extent(1);
707  Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
708  rwrk_ = rwrk_buf_;
709  Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
710  iwrk_ = iwrk_buf_;
711  }
712 
713  KOKKOS_INLINE_FUNCTION
714  void operator() (const Size i, int& jinfo) const {
715  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
716  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
717  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
718  int info = 0;
719  Tpetra::Experimental::GETF2(D_cur, ipiv, info);
720  if (info) {
721  ++jinfo;
722  return;
723  }
724  Tpetra::Experimental::GETRI(D_cur, ipiv, work, info);
725  if (info) ++jinfo;
726  }
727 
728  // Report the number of blocks with errors.
729  KOKKOS_INLINE_FUNCTION
730  void join (volatile value_type& dst, volatile value_type const& src) const { dst += src; }
731 };
732 }
733 
734 template<class MatrixType>
735 void Relaxation<MatrixType>::computeBlockCrs ()
736 {
737  using Kokkos::ALL;
738  using Teuchos::Array;
739  using Teuchos::ArrayRCP;
740  using Teuchos::ArrayView;
741  using Teuchos::as;
742  using Teuchos::Comm;
743  using Teuchos::RCP;
744  using Teuchos::rcp;
745  using Teuchos::REDUCE_MAX;
746  using Teuchos::REDUCE_MIN;
747  using Teuchos::REDUCE_SUM;
748  using Teuchos::rcp_dynamic_cast;
749  using Teuchos::reduceAll;
750  typedef local_ordinal_type LO;
751  typedef typename node_type::device_type device_type;
752 
753  const std::string timerName ("Ifpack2::Relaxation::computeBlockCrs");
754  Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
755  if (timer.is_null ()) {
756  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
757  }
758  {
759  TEUCHOS_TEST_FOR_EXCEPTION(
760  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
761  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
762  "with a nonnull input matrix, then call initialize(), before calling "
763  "this method.");
764  const block_crs_matrix_type* blockCrsA =
765  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
766  TEUCHOS_TEST_FOR_EXCEPTION(
767  blockCrsA == NULL, std::logic_error, "Ifpack2::Relaxation::"
768  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
769  "got this far. Please report this bug to the Ifpack2 developers.");
770 
771  const scalar_type one = STS::one ();
772 
773  // Reset state.
774  IsComputed_ = false;
775 
776  const LO lclNumMeshRows =
777  blockCrsA->getCrsGraph ().getNodeNumRows ();
778  const LO blockSize = blockCrsA->getBlockSize ();
779 
780  if (! savedDiagOffsets_) {
781  blockDiag_ = block_diag_type (); // clear it before reallocating
782  blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
783  lclNumMeshRows, blockSize, blockSize);
784  if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
785  // Clear diagOffsets_ first (by assigning an empty View to it)
786  // to save memory, before reallocating.
787  diagOffsets_ = Kokkos::View<size_t*, device_type> ();
788  diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
789  }
790  blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
791  TEUCHOS_TEST_FOR_EXCEPTION
792  (static_cast<size_t> (diagOffsets_.extent (0)) !=
793  static_cast<size_t> (blockDiag_.extent (0)),
794  std::logic_error, "diagOffsets_.extent(0) = " <<
795  diagOffsets_.extent (0) << " != blockDiag_.extent(0) = "
796  << blockDiag_.extent (0) <<
797  ". Please report this bug to the Ifpack2 developers.");
798  savedDiagOffsets_ = true;
799  }
800  blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
801 
802  // Use an unmanaged View in this method, so that when we take
803  // subviews of it (to get each diagonal block), we don't have to
804  // touch the reference count. Reference count updates are a
805  // thread scalability bottleneck and have a performance cost even
806  // without using threads.
807  unmanaged_block_diag_type blockDiag = blockDiag_;
808 
809  if (DoL1Method_ && IsParallel_) {
810  const scalar_type two = one + one;
811  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
812  Array<LO> indices (maxLength);
813  Array<scalar_type> values (maxLength * blockSize * blockSize);
814  size_t numEntries = 0;
815 
816  for (LO i = 0; i < lclNumMeshRows; ++i) {
817  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
818  blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
819 
820  auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
821  for (LO subRow = 0; subRow < blockSize; ++subRow) {
822  magnitude_type diagonal_boost = STM::zero ();
823  for (size_t k = 0 ; k < numEntries ; ++k) {
824  if (indices[k] > lclNumMeshRows) {
825  const size_t offset = blockSize*blockSize*k + subRow*blockSize;
826  for (LO subCol = 0; subCol < blockSize; ++subCol) {
827  diagonal_boost += STS::magnitude (values[offset+subCol] / two);
828  }
829  }
830  }
831  if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
832  diagBlock(subRow, subRow) += diagonal_boost;
833  }
834  }
835  }
836  }
837 
838  int info = 0;
839  {
840  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
841  typedef typename unmanaged_block_diag_type::execution_space exec_space;
842  typedef Kokkos::RangePolicy<exec_space, LO> range_type;
843 
844  Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
845  // FIXME (mfh 19 May 2017) Different processes may not
846  // necessarily have this error all at the same time, so it would
847  // be better just to preserve a local error state and let users
848  // check.
849  TEUCHOS_TEST_FOR_EXCEPTION
850  (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
851  << " diagonal blocks.");
852  }
853 
854  // In a debug build, do an extra test to make sure that all the
855  // factorizations were computed correctly.
856 #ifdef HAVE_IFPACK2_DEBUG
857  const int numResults = 2;
858  // Use "max = -min" trick to get min and max in a single all-reduce.
859  int lclResults[2], gblResults[2];
860  lclResults[0] = info;
861  lclResults[1] = -info;
862  gblResults[0] = 0;
863  gblResults[1] = 0;
864  reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
865  numResults, lclResults, gblResults);
866  TEUCHOS_TEST_FOR_EXCEPTION
867  (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
868  "Ifpack2::Relaxation::compute: When processing the input "
869  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
870  "failed on one or more (MPI) processes.");
871 #endif // HAVE_IFPACK2_DEBUG
872 
873  Importer_ = A_->getGraph ()->getImporter ();
874  } // end TimeMonitor scope
875 
876  ComputeTime_ += timer->totalElapsedTime ();
877  ++NumCompute_;
878  IsComputed_ = true;
879 }
880 
881 template<class MatrixType>
883 {
884  using Teuchos::Array;
885  using Teuchos::ArrayRCP;
886  using Teuchos::ArrayView;
887  using Teuchos::as;
888  using Teuchos::Comm;
889  using Teuchos::RCP;
890  using Teuchos::rcp;
891  using Teuchos::REDUCE_MAX;
892  using Teuchos::REDUCE_MIN;
893  using Teuchos::REDUCE_SUM;
894  using Teuchos::rcp_dynamic_cast;
895  using Teuchos::reduceAll;
896  typedef Tpetra::Vector<scalar_type, local_ordinal_type,
897  global_ordinal_type, node_type> vector_type;
898  typedef typename vector_type::device_type device_type;
899  const scalar_type zero = STS::zero ();
900  const scalar_type one = STS::one ();
901 
902  // We don't count initialization in compute() time.
903  if (! isInitialized ()) {
904  initialize ();
905  }
906 
907  if (hasBlockCrsMatrix_) {
908  computeBlockCrs ();
909  return;
910  }
911 
912 
913  const std::string timerName ("Ifpack2::Relaxation::compute");
914  Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
915  if (timer.is_null ()) {
916  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
917  }
918 
919 
920 
921  {
922  TEUCHOS_TEST_FOR_EXCEPTION(
923  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::compute: "
924  "The input matrix A is null. Please call setMatrix() with a nonnull "
925  "input matrix, then call initialize(), before calling this method.");
926 
927  // Reset state.
928  IsComputed_ = false;
929 
930  TEUCHOS_TEST_FOR_EXCEPTION(
931  NumSweeps_ < 0, std::logic_error,
932  "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ << " < 0. "
933  "Please report this bug to the Ifpack2 developers.");
934 
935  Diagonal_ = rcp (new vector_type (A_->getRowMap ()));
936 
937  // Extract the diagonal entries. The CrsMatrix static graph
938  // version is faster for subsequent calls to compute(), since it
939  // caches the diagonal offsets.
940  //
941  // isStaticGraph() == true means that the matrix was created with
942  // a const graph. The only requirement is that the structure of
943  // the matrix never changes, so isStaticGraph() == true is a bit
944  // more conservative than we need. However, Tpetra doesn't (as of
945  // 05 Apr 2013) have a way to tell if the graph hasn't changed
946  // since the last time we used it.
947  {
948  // NOTE (mfh 07 Jul 2013): We must cast here to CrsMatrix
949  // instead of MatrixType, because isStaticGraph is a CrsMatrix
950  // method (not inherited from RowMatrix's interface). It's
951  // perfectly valid to do relaxation on a RowMatrix which is not
952  // a CrsMatrix.
953  const crs_matrix_type* crsMat =
954  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
955  if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
956  A_->getLocalDiagCopy (*Diagonal_); // slow path
957  } else {
958  if (! savedDiagOffsets_) { // we haven't precomputed offsets
959  const size_t lclNumRows = A_->getRowMap ()->getNodeNumElements ();
960  if (diagOffsets_.extent (0) < lclNumRows) {
961  typedef typename node_type::device_type DT;
962  diagOffsets_ = Kokkos::View<size_t*, DT> (); // clear 1st to save mem
963  diagOffsets_ = Kokkos::View<size_t*, DT> ("offsets", lclNumRows);
964  }
965  crsMat->getCrsGraph ()->getLocalDiagOffsets (diagOffsets_);
966  savedDiagOffsets_ = true;
967  }
968  crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_);
969 #ifdef HAVE_IFPACK2_DEBUG
970  // Validate the fast-path diagonal against the slow-path diagonal.
971  vector_type D_copy (A_->getRowMap ());
972  A_->getLocalDiagCopy (D_copy);
973  D_copy.update (STS::one (), *Diagonal_, -STS::one ());
974  const magnitude_type err = D_copy.normInf ();
975  // The two diagonals should be exactly the same, so their
976  // difference should be exactly zero.
977  TEUCHOS_TEST_FOR_EXCEPTION(
978  err != STM::zero(), std::logic_error, "Ifpack2::Relaxation::compute: "
979  "\"fast-path\" diagonal computation failed. \\|D1 - D2\\|_inf = "
980  << err << ".");
981 #endif // HAVE_IFPACK2_DEBUG
982  }
983  }
984 
985  // If we're checking the computed inverse diagonal, then keep a
986  // copy of the original diagonal entries for later comparison.
987  RCP<vector_type> origDiag;
988  if (checkDiagEntries_) {
989  origDiag = rcp (new vector_type (A_->getRowMap ()));
990  Tpetra::deep_copy (*origDiag, *Diagonal_);
991  }
992 
993  const size_t numMyRows = A_->getNodeNumRows ();
994 
995  // We're about to read and write diagonal entries on the host.
996  Diagonal_->template sync<Kokkos::HostSpace> ();
997  Diagonal_->template modify<Kokkos::HostSpace> ();
998  auto diag_2d = Diagonal_->template getLocalView<Kokkos::HostSpace> ();
999  auto diag_1d = Kokkos::subview (diag_2d, Kokkos::ALL (), 0);
1000  // FIXME (mfh 12 Jan 2016) temp fix for Kokkos::complex vs. std::complex.
1001  scalar_type* const diag = reinterpret_cast<scalar_type*> (diag_1d.data ());
1002 
1003  // Setup for L1 Methods.
1004  // Here we add half the value of the off-processor entries in the row,
1005  // but only if diagonal isn't sufficiently large.
1006  //
1007  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1008  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1009  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1010  if (DoL1Method_ && IsParallel_) {
1011  const scalar_type two = one + one;
1012  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1013  Array<local_ordinal_type> indices (maxLength);
1014  Array<scalar_type> values (maxLength);
1015  size_t numEntries;
1016 
1017  for (size_t i = 0; i < numMyRows; ++i) {
1018  A_->getLocalRowCopy (i, indices (), values (), numEntries);
1019  magnitude_type diagonal_boost = STM::zero ();
1020  for (size_t k = 0 ; k < numEntries ; ++k) {
1021  if (static_cast<size_t> (indices[k]) > numMyRows) {
1022  diagonal_boost += STS::magnitude (values[k] / two);
1023  }
1024  }
1025  if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
1026  diag[i] += diagonal_boost;
1027  }
1028  }
1029  }
1030 
1031  //
1032  // Invert the diagonal entries of the matrix (not in place).
1033  //
1034 
1035  // Precompute some quantities for "fixing" zero or tiny diagonal
1036  // entries. We'll only use them if this "fixing" is enabled.
1037  //
1038  // SmallTraits covers for the lack of eps() in
1039  // Teuchos::ScalarTraits when its template parameter is not a
1040  // floating-point type. (Ifpack2 sometimes gets instantiated for
1041  // integer Scalar types.)
1042  const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
1043  one / SmallTraits<scalar_type>::eps () :
1044  one / MinDiagonalValue_;
1045  // It's helpful not to have to recompute this magnitude each time.
1046  const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1047 
1048  if (checkDiagEntries_) {
1049  //
1050  // Check diagonal elements, replace zero elements with the minimum
1051  // diagonal value, and store their inverses. Count the number of
1052  // "small" and zero diagonal entries while we're at it.
1053  //
1054  size_t numSmallDiagEntries = 0; // "small" includes zero
1055  size_t numZeroDiagEntries = 0; // # zero diagonal entries
1056  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1057 
1058  // As we go, keep track of the diagonal entries with the least and
1059  // greatest magnitude. We could use the trick of starting the min
1060  // with +Inf and the max with -Inf, but that doesn't work if
1061  // scalar_type is a built-in integer type. Thus, we have to start
1062  // by reading the first diagonal entry redundantly.
1063  // scalar_type minMagDiagEntry = zero;
1064  // scalar_type maxMagDiagEntry = zero;
1065  magnitude_type minMagDiagEntryMag = STM::zero ();
1066  magnitude_type maxMagDiagEntryMag = STM::zero ();
1067  if (numMyRows > 0) {
1068  const scalar_type d_0 = diag[0];
1069  const magnitude_type d_0_mag = STS::magnitude (d_0);
1070  // minMagDiagEntry = d_0;
1071  // maxMagDiagEntry = d_0;
1072  minMagDiagEntryMag = d_0_mag;
1073  maxMagDiagEntryMag = d_0_mag;
1074  }
1075 
1076  // Go through all the diagonal entries. Compute counts of
1077  // small-magnitude, zero, and negative-real-part entries. Invert
1078  // the diagonal entries that aren't too small. For those that are
1079  // too small in magnitude, replace them with 1/MinDiagonalValue_
1080  // (or 1/eps if MinDiagonalValue_ happens to be zero).
1081  for (size_t i = 0 ; i < numMyRows; ++i) {
1082  const scalar_type d_i = diag[i];
1083  const magnitude_type d_i_mag = STS::magnitude (d_i);
1084  const magnitude_type d_i_real = STS::real (d_i);
1085 
1086  // We can't compare complex numbers, but we can compare their
1087  // real parts.
1088  if (d_i_real < STM::zero ()) {
1089  ++numNegDiagEntries;
1090  }
1091  if (d_i_mag < minMagDiagEntryMag) {
1092  // minMagDiagEntry = d_i;
1093  minMagDiagEntryMag = d_i_mag;
1094  }
1095  if (d_i_mag > maxMagDiagEntryMag) {
1096  // maxMagDiagEntry = d_i;
1097  maxMagDiagEntryMag = d_i_mag;
1098  }
1099 
1100  if (fixTinyDiagEntries_) {
1101  // <= not <, in case minDiagValMag is zero.
1102  if (d_i_mag <= minDiagValMag) {
1103  ++numSmallDiagEntries;
1104  if (d_i_mag == STM::zero ()) {
1105  ++numZeroDiagEntries;
1106  }
1107  diag[i] = oneOverMinDiagVal;
1108  } else {
1109  diag[i] = one / d_i;
1110  }
1111  }
1112  else { // Don't fix zero or tiny diagonal entries.
1113  // <= not <, in case minDiagValMag is zero.
1114  if (d_i_mag <= minDiagValMag) {
1115  ++numSmallDiagEntries;
1116  if (d_i_mag == STM::zero ()) {
1117  ++numZeroDiagEntries;
1118  }
1119  }
1120  diag[i] = one / d_i;
1121  }
1122  }
1123 
1124  // Count floating-point operations of computing the inverse diagonal.
1125  //
1126  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1127  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1128  ComputeFlops_ += 4.0 * numMyRows;
1129  } else {
1130  ComputeFlops_ += numMyRows;
1131  }
1132 
1133  // Collect global data about the diagonal entries. Only do this
1134  // (which involves O(1) all-reduces) if the user asked us to do
1135  // the extra work.
1136  //
1137  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1138  // zero rows. Fixing this requires one of two options:
1139  //
1140  // 1. Use a custom reduction operation that ignores processes
1141  // with zero diagonal entries.
1142  // 2. Split the communicator, compute all-reduces using the
1143  // subcommunicator over processes that have a nonzero number
1144  // of diagonal entries, and then broadcast from one of those
1145  // processes (if there is one) to the processes in the other
1146  // subcommunicator.
1147  const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1148 
1149  // Compute global min and max magnitude of entries.
1150  Array<magnitude_type> localVals (2);
1151  localVals[0] = minMagDiagEntryMag;
1152  // (- (min (- x))) is the same as (max x).
1153  localVals[1] = -maxMagDiagEntryMag;
1154  Array<magnitude_type> globalVals (2);
1155  reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1156  localVals.getRawPtr (),
1157  globalVals.getRawPtr ());
1158  globalMinMagDiagEntryMag_ = globalVals[0];
1159  globalMaxMagDiagEntryMag_ = -globalVals[1];
1160 
1161  Array<size_t> localCounts (3);
1162  localCounts[0] = numSmallDiagEntries;
1163  localCounts[1] = numZeroDiagEntries;
1164  localCounts[2] = numNegDiagEntries;
1165  Array<size_t> globalCounts (3);
1166  reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1167  localCounts.getRawPtr (),
1168  globalCounts.getRawPtr ());
1169  globalNumSmallDiagEntries_ = globalCounts[0];
1170  globalNumZeroDiagEntries_ = globalCounts[1];
1171  globalNumNegDiagEntries_ = globalCounts[2];
1172 
1173  // Forestall "set but not used" compiler warnings.
1174  // (void) minMagDiagEntry;
1175  // (void) maxMagDiagEntry;
1176 
1177  // Compute and save the difference between the computed inverse
1178  // diagonal, and the original diagonal's inverse.
1179  //
1180  // NOTE (mfh 11 Jan 2016) We need to sync Diagonal_ back from
1181  // host to device for the update kernel below, and we don't need
1182  // to modify it or sync it back again here.
1183  vector_type diff (A_->getRowMap ());
1184  diff.reciprocal (*origDiag);
1185  Diagonal_->template sync<device_type> ();
1186  diff.update (-one, *Diagonal_, one);
1187  globalDiagNormDiff_ = diff.norm2 ();
1188  }
1189  else { // don't check diagonal elements
1190  if (fixTinyDiagEntries_) {
1191  // Go through all the diagonal entries. Invert those that
1192  // aren't too small in magnitude. For those that are too
1193  // small in magnitude, replace them with oneOverMinDiagVal.
1194  for (size_t i = 0 ; i < numMyRows; ++i) {
1195  const scalar_type d_i = diag[i];
1196  const magnitude_type d_i_mag = STS::magnitude (d_i);
1197 
1198  // <= not <, in case minDiagValMag is zero.
1199  if (d_i_mag <= minDiagValMag) {
1200  diag[i] = oneOverMinDiagVal;
1201  } else {
1202  diag[i] = one / d_i;
1203  }
1204  }
1205  }
1206  else { // don't fix tiny or zero diagonal entries
1207  for (size_t i = 0 ; i < numMyRows; ++i) {
1208  diag[i] = one / diag[i];
1209  }
1210  }
1211  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1212  ComputeFlops_ += 4.0 * numMyRows;
1213  } else {
1214  ComputeFlops_ += numMyRows;
1215  }
1216  }
1217 
1218  if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
1219  PrecType_ == Ifpack2::Details::SGS)) {
1220  // mfh 21 Mar 2013: The Import object may be null, but in that
1221  // case, the domain and column Maps are the same and we don't
1222  // need to Import anyway.
1223  Importer_ = A_->getGraph ()->getImporter ();
1224  Diagonal_->template sync<device_type> ();
1225  }
1226  //KokkosKernels GaussSiedel Initialization.
1227  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
1228  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
1229  TEUCHOS_TEST_FOR_EXCEPTION
1230  (crsMat == NULL, std::logic_error, "Ifpack2::Relaxation::compute: "
1231  "Multithreaded Gauss-Seidel methods currently only work when the input "
1232  "matrix is a Tpetra::CrsMatrix.");
1233  local_matrix_type kcsr = crsMat->getLocalMatrix ();
1234 
1235  const bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
1236  using KokkosSparse::Experimental::gauss_seidel_numeric;
1237  gauss_seidel_numeric<mt_kernel_handle_type,
1238  lno_row_view_t,
1239  lno_nonzero_view_t,
1240  scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1241  A_->getNodeNumRows (), A_->getNodeNumCols (),
1242  kcsr.graph.row_map,
1243  kcsr.graph.entries,
1244  kcsr.values,
1245  is_symmetric);
1246  }
1247  } // end TimeMonitor scope
1248 
1249  ComputeTime_ += timer->totalElapsedTime ();
1250  ++NumCompute_;
1251  IsComputed_ = true;
1252 }
1253 
1254 
1255 template<class MatrixType>
1256 void
1258 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1259  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1260 {
1261  using Teuchos::as;
1262  if (hasBlockCrsMatrix_) {
1263  ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1264  return;
1265  }
1266 
1267  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1268  const double numVectors = as<double> (X.getNumVectors ());
1269  if (ZeroStartingSolution_) {
1270  // For the first Jacobi sweep, if we are allowed to assume that
1271  // the initial guess is zero, then Jacobi is just diagonal
1272  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1273  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1274  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1275 
1276  // Count (global) floating-point operations. Ifpack2 represents
1277  // this as a floating-point number rather than an integer, so that
1278  // overflow (for a very large number of calls, or a very large
1279  // problem) is approximate instead of catastrophic.
1280  double flopUpdate = 0.0;
1281  if (DampingFactor_ == STS::one ()) {
1282  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1283  flopUpdate = numGlobalRows * numVectors;
1284  } else {
1285  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1286  // Two multiplies per entry of Y.
1287  flopUpdate = 2.0 * numGlobalRows * numVectors;
1288  }
1289  ApplyFlops_ += flopUpdate;
1290  if (NumSweeps_ == 1) {
1291  return;
1292  }
1293  }
1294  // If we were allowed to assume that the starting guess was zero,
1295  // then we have already done the first sweep above.
1296  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1297 
1298  // Allocate a multivector if the cached one isn't perfect
1299  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1300 
1301  for (int j = startSweep; j < NumSweeps_; ++j) {
1302  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1303  applyMat (Y, *cachedMV_);
1304  cachedMV_->update (STS::one (), X, -STS::one ());
1305  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1306  }
1307 
1308  // For each column of output, for each pass over the matrix:
1309  //
1310  // - One + and one * for each matrix entry
1311  // - One / and one + for each row of the matrix
1312  // - If the damping factor is not one: one * for each row of the
1313  // matrix. (It's not fair to count this if the damping factor is
1314  // one, since the implementation could skip it. Whether it does
1315  // or not is the implementation's choice.)
1316 
1317  // Floating-point operations due to the damping factor, per matrix
1318  // row, per direction, per columm of output.
1319  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1320  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1321  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1322  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1323 }
1324 
1325 template<class MatrixType>
1326 void
1327 Relaxation<MatrixType>::
1328 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1329  local_ordinal_type,
1330  global_ordinal_type,
1331  node_type>& X,
1332  Tpetra::MultiVector<scalar_type,
1333  local_ordinal_type,
1334  global_ordinal_type,
1335  node_type>& Y) const
1336 {
1337  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1338  local_ordinal_type, global_ordinal_type, node_type> BMV;
1339 
1340  const block_crs_matrix_type* blockMatConst =
1341  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1342  TEUCHOS_TEST_FOR_EXCEPTION
1343  (blockMatConst == NULL, std::logic_error, "This method should never be "
1344  "called if the matrix A_ is not a BlockCrsMatrix. Please report this "
1345  "bug to the Ifpack2 developers.");
1346  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1347  // This is because applyBlock() is nonconst (more accurate), while
1348  // apply() is const (required by Tpetra::Operator interface, but a
1349  // lie, because it possibly allocates temporary buffers).
1350  block_crs_matrix_type* blockMat =
1351  const_cast<block_crs_matrix_type*> (blockMatConst);
1352 
1353  auto meshRowMap = blockMat->getRowMap ();
1354  auto meshColMap = blockMat->getColMap ();
1355  const local_ordinal_type blockSize = blockMat->getBlockSize ();
1356 
1357  BMV X_blk (X, *meshColMap, blockSize);
1358  BMV Y_blk (Y, *meshRowMap, blockSize);
1359 
1360  if (ZeroStartingSolution_) {
1361  // For the first sweep, if we are allowed to assume that the
1362  // initial guess is zero, then block Jacobi is just block diagonal
1363  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1364  Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1365  if (NumSweeps_ == 1) {
1366  return;
1367  }
1368  }
1369 
1370  auto pointRowMap = Y.getMap ();
1371  const size_t numVecs = X.getNumVectors ();
1372 
1373  // We don't need to initialize the result MV, since the sparse
1374  // mat-vec will clobber its contents anyway.
1375  BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1376 
1377  // If we were allowed to assume that the starting guess was zero,
1378  // then we have already done the first sweep above.
1379  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1380 
1381  for (int j = startSweep; j < NumSweeps_; ++j) {
1382  blockMat->applyBlock (Y_blk, A_times_Y);
1383  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1384  Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1385  X_blk, A_times_Y, STS::one ());
1386  }
1387 }
1388 
1389 template<class MatrixType>
1390 void
1391 Relaxation<MatrixType>::
1392 ApplyInverseGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1393  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1394 {
1395  typedef Relaxation<MatrixType> this_type;
1396  // The CrsMatrix version is faster, because it can access the sparse
1397  // matrix data directly, rather than by copying out each row's data
1398  // in turn. Thus, we check whether the RowMatrix is really a
1399  // CrsMatrix.
1400  //
1401  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1402  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1403  // will still be correct if the cast fails, but it will use an
1404  // unoptimized kernel.
1405  const block_crs_matrix_type* blockCrsMat =
1406  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1407  const crs_matrix_type* crsMat =
1408  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
1409  if (blockCrsMat != NULL) {
1410  const_cast<this_type*> (this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1411  } else if (crsMat != NULL) {
1412  ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1413  } else {
1414  ApplyInverseGS_RowMatrix (X, Y);
1415  }
1416 }
1417 
1418 
1419 template<class MatrixType>
1420 void
1421 Relaxation<MatrixType>::
1422 ApplyInverseGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1423  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1424 {
1425  using Teuchos::Array;
1426  using Teuchos::ArrayRCP;
1427  using Teuchos::ArrayView;
1428  using Teuchos::as;
1429  using Teuchos::RCP;
1430  using Teuchos::rcp;
1431  using Teuchos::rcpFromRef;
1432  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1433 
1434  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1435  // starting multivector itself. The generic RowMatrix version here
1436  // does not, so we have to zero out Y here.
1437  if (ZeroStartingSolution_) {
1438  Y.putScalar (STS::zero ());
1439  }
1440 
1441  const size_t NumVectors = X.getNumVectors ();
1442  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1443  Array<local_ordinal_type> Indices (maxLength);
1444  Array<scalar_type> Values (maxLength);
1445 
1446  // Local smoothing stuff
1447  const size_t numMyRows = A_->getNodeNumRows();
1448  const local_ordinal_type* rowInd = 0;
1449  size_t numActive = numMyRows;
1450  bool do_local = ! localSmoothingIndices_.is_null ();
1451  if (do_local) {
1452  rowInd = localSmoothingIndices_.getRawPtr ();
1453  numActive = localSmoothingIndices_.size ();
1454  }
1455 
1456  RCP<MV> Y2;
1457  if (IsParallel_) {
1458  if (Importer_.is_null ()) { // domain and column Maps are the same.
1459  updateCachedMultiVector(Y.getMap(),NumVectors);
1460  } else {
1461  updateCachedMultiVector(Importer_->getTargetMap(),NumVectors);
1462  }
1463  Y2= cachedMV_;
1464  }
1465  else {
1466  Y2 = rcpFromRef (Y);
1467  }
1468 
1469  // Diagonal
1470  ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1471  ArrayView<const scalar_type> d_ptr = d_rcp();
1472 
1473  // Constant stride check
1474  bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1475 
1476  if (constant_stride) {
1477  // extract 1D RCPs
1478  size_t x_stride = X.getStride();
1479  size_t y2_stride = Y2->getStride();
1480  ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1481  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1482  ArrayView<scalar_type> y2_ptr = y2_rcp();
1483  ArrayView<const scalar_type> x_ptr = x_rcp();
1484  Array<scalar_type> dtemp(NumVectors,STS::zero());
1485 
1486  for (int j = 0; j < NumSweeps_; ++j) {
1487  // data exchange is here, once per sweep
1488  if (IsParallel_) {
1489  if (Importer_.is_null ()) {
1490  *Y2 = Y; // just copy, since domain and column Maps are the same
1491  } else {
1492  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1493  }
1494  }
1495 
1496  if (! DoBackwardGS_) { // Forward sweep
1497  for (size_t ii = 0; ii < numActive; ++ii) {
1498  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1499  size_t NumEntries;
1500  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1501  dtemp.assign(NumVectors,STS::zero());
1502 
1503  for (size_t k = 0; k < NumEntries; ++k) {
1504  const local_ordinal_type col = Indices[k];
1505  for (size_t m = 0; m < NumVectors; ++m) {
1506  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1507  }
1508  }
1509 
1510  for (size_t m = 0; m < NumVectors; ++m) {
1511  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1512  }
1513  }
1514  }
1515  else { // Backward sweep
1516  // ptrdiff_t is the same size as size_t, but is signed. Being
1517  // signed is important so that i >= 0 is not trivially true.
1518  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1519  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1520  size_t NumEntries;
1521  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1522  dtemp.assign (NumVectors, STS::zero ());
1523 
1524  for (size_t k = 0; k < NumEntries; ++k) {
1525  const local_ordinal_type col = Indices[k];
1526  for (size_t m = 0; m < NumVectors; ++m) {
1527  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1528  }
1529  }
1530 
1531  for (size_t m = 0; m < NumVectors; ++m) {
1532  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1533  }
1534  }
1535  }
1536  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1537  if (IsParallel_) {
1538  Tpetra::deep_copy (Y, *Y2);
1539  }
1540  }
1541  }
1542  else {
1543  // extract 2D RCPS
1544  ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1545  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1546 
1547  for (int j = 0; j < NumSweeps_; ++j) {
1548  // data exchange is here, once per sweep
1549  if (IsParallel_) {
1550  if (Importer_.is_null ()) {
1551  *Y2 = Y; // just copy, since domain and column Maps are the same
1552  } else {
1553  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1554  }
1555  }
1556 
1557  if (! DoBackwardGS_) { // Forward sweep
1558  for (size_t ii = 0; ii < numActive; ++ii) {
1559  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1560  size_t NumEntries;
1561  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1562 
1563  for (size_t m = 0; m < NumVectors; ++m) {
1564  scalar_type dtemp = STS::zero ();
1565  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1566  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1567 
1568  for (size_t k = 0; k < NumEntries; ++k) {
1569  const local_ordinal_type col = Indices[k];
1570  dtemp += Values[k] * y2_local[col];
1571  }
1572  y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1573  }
1574  }
1575  }
1576  else { // Backward sweep
1577  // ptrdiff_t is the same size as size_t, but is signed. Being
1578  // signed is important so that i >= 0 is not trivially true.
1579  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1580  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1581 
1582  size_t NumEntries;
1583  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1584 
1585  for (size_t m = 0; m < NumVectors; ++m) {
1586  scalar_type dtemp = STS::zero ();
1587  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1588  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1589 
1590  for (size_t k = 0; k < NumEntries; ++k) {
1591  const local_ordinal_type col = Indices[k];
1592  dtemp += Values[k] * y2_local[col];
1593  }
1594  y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1595  }
1596  }
1597  }
1598 
1599  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1600  if (IsParallel_) {
1601  Tpetra::deep_copy (Y, *Y2);
1602  }
1603  }
1604  }
1605 
1606 
1607  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1608  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1609  const double numVectors = as<double> (X.getNumVectors ());
1610  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1611  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1612  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1613  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1614 }
1615 
1616 
1617 template<class MatrixType>
1618 void
1619 Relaxation<MatrixType>::
1620 ApplyInverseGS_CrsMatrix (const crs_matrix_type& A,
1621  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1622  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1623 {
1624  using Teuchos::as;
1625  const Tpetra::ESweepDirection direction =
1626  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1627  if (localSmoothingIndices_.is_null ()) {
1628  A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1629  NumSweeps_, ZeroStartingSolution_);
1630  }
1631  else {
1632  A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1633  DampingFactor_, direction,
1634  NumSweeps_, ZeroStartingSolution_);
1635  }
1636 
1637  // For each column of output, for each sweep over the matrix:
1638  //
1639  // - One + and one * for each matrix entry
1640  // - One / and one + for each row of the matrix
1641  // - If the damping factor is not one: one * for each row of the
1642  // matrix. (It's not fair to count this if the damping factor is
1643  // one, since the implementation could skip it. Whether it does
1644  // or not is the implementation's choice.)
1645 
1646  // Floating-point operations due to the damping factor, per matrix
1647  // row, per direction, per columm of output.
1648  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1649  const double numVectors = as<double> (X.getNumVectors ());
1650  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1651  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1652  ApplyFlops_ += NumSweeps_ * numVectors *
1653  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1654 }
1655 
1656 template<class MatrixType>
1657 void
1658 Relaxation<MatrixType>::
1659 ApplyInverseGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1660  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1661  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1662 {
1663  using Teuchos::RCP;
1664  using Teuchos::rcp;
1665  using Teuchos::rcpFromRef;
1666  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1667  local_ordinal_type, global_ordinal_type, node_type> BMV;
1668  typedef Tpetra::MultiVector<scalar_type,
1669  local_ordinal_type, global_ordinal_type, node_type> MV;
1670 
1671  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1672  //
1673  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1674  // multiple right-hand sides, unless the input or output MultiVector
1675  // does not have constant stride. We should check for that case
1676  // here, in case it doesn't work in localGaussSeidel (which is
1677  // entirely possible).
1678  BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1679  const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1680 
1681  bool performImport = false;
1682  RCP<BMV> yBlockCol;
1683  if (Importer_.is_null ()) {
1684  yBlockCol = rcpFromRef (yBlock);
1685  }
1686  else {
1687  if (yBlockColumnPointMap_.is_null () ||
1688  yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1689  yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1690  yBlockColumnPointMap_ =
1691  rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
1692  static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
1693  }
1694  yBlockCol = yBlockColumnPointMap_;
1695  performImport = true;
1696  }
1697 
1698  if (ZeroStartingSolution_) {
1699  yBlockCol->putScalar (STS::zero ());
1700  }
1701  else if (performImport) {
1702  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1703  }
1704 
1705  const Tpetra::ESweepDirection direction =
1706  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1707 
1708  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1709  if (performImport && sweep > 0) {
1710  yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
1711  }
1712  A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
1713  DampingFactor_, direction);
1714  if (performImport) {
1715  RCP<const MV> yBlockColPointDomain =
1716  yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1717  Tpetra::deep_copy (Y, *yBlockColPointDomain);
1718  }
1719  }
1720 }
1721 
1722 
1723 
1724 
1725 template<class MatrixType>
1726 void
1727 Relaxation<MatrixType>::
1728 MTGaussSeidel (const crs_matrix_type* crsMat,
1729  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1730  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1731  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& /* D */,
1732  const scalar_type& /* dampingFactor */,
1733  const Tpetra::ESweepDirection direction,
1734  const int numSweeps,
1735  const bool zeroInitialGuess) const
1736 {
1737  using Teuchos::null;
1738  using Teuchos::RCP;
1739  using Teuchos::rcp;
1740  using Teuchos::rcpFromRef;
1741  using Teuchos::rcp_const_cast;
1742 
1743  typedef scalar_type Scalar;
1744  typedef local_ordinal_type LocalOrdinal;
1745  typedef global_ordinal_type GlobalOrdinal;
1746  typedef node_type Node;
1747 
1748  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1749  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1750 
1751  TEUCHOS_TEST_FOR_EXCEPTION
1752  (crsMat == NULL, std::logic_error, prefix << "The matrix is NULL. This "
1753  "should never happen. Please report this bug to the Ifpack2 developers.");
1754  TEUCHOS_TEST_FOR_EXCEPTION
1755  (! crsMat->isFillComplete (), std::runtime_error, prefix << "The input "
1756  "CrsMatrix is not fill complete. Please call fillComplete on the matrix,"
1757  " then do setup again, before calling apply(). \"Do setup\" means that "
1758  "if only the matrix's values have changed since last setup, you need only"
1759  " call compute(). If the matrix's structure may also have changed, you "
1760  "must first call initialize(), then call compute(). If you have not set"
1761  " up this preconditioner for this matrix before, you must first call "
1762  "initialize(), then call compute().");
1763  TEUCHOS_TEST_FOR_EXCEPTION
1764  (numSweeps < 0, std::invalid_argument, prefix << "The number of sweeps "
1765  "must be nonnegative, but you provided numSweeps = " << numSweeps <<
1766  " < 0.");
1767  if (numSweeps == 0) {
1768  return;
1769  }
1770 
1771  typedef typename Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1772  typedef typename crs_matrix_type::import_type import_type;
1773  typedef typename crs_matrix_type::export_type export_type;
1774  typedef typename crs_matrix_type::map_type map_type;
1775 
1776  RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1777  RCP<const export_type> exporter = crsMat->getGraph ()->getExporter ();
1778  TEUCHOS_TEST_FOR_EXCEPTION(
1779  ! exporter.is_null (), std::runtime_error,
1780  "This method's implementation currently requires that the matrix's row, "
1781  "domain, and range Maps be the same. This cannot be the case, because "
1782  "the matrix has a nontrivial Export object.");
1783 
1784  RCP<const map_type> domainMap = crsMat->getDomainMap ();
1785  RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1786  RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1787  RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1788 
1789 #ifdef HAVE_IFPACK2_DEBUG
1790  {
1791  // The relation 'isSameAs' is transitive. It's also a
1792  // collective, so we don't have to do a "shared" test for
1793  // exception (i.e., a global reduction on the test value).
1794  TEUCHOS_TEST_FOR_EXCEPTION(
1795  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1796  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1797  "multivector X be in the domain Map of the matrix.");
1798  TEUCHOS_TEST_FOR_EXCEPTION(
1799  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1800  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1801  "B be in the range Map of the matrix.");
1802  TEUCHOS_TEST_FOR_EXCEPTION(
1803  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1804  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1805  "D be in the row Map of the matrix.");
1806  TEUCHOS_TEST_FOR_EXCEPTION(
1807  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1808  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1809  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1810  TEUCHOS_TEST_FOR_EXCEPTION(
1811  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1812  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1813  "the range Map of the matrix be the same.");
1814  }
1815 #else
1816  // Forestall any compiler warnings for unused variables.
1817  (void) rangeMap;
1818  (void) rowMap;
1819 #endif // HAVE_IFPACK2_DEBUG
1820 
1821  // Fetch a (possibly cached) temporary column Map multivector
1822  // X_colMap, and a domain Map view X_domainMap of it. Both have
1823  // constant stride by construction. We know that the domain Map
1824  // must include the column Map, because our Gauss-Seidel kernel
1825  // requires that the row Map, domain Map, and range Map are all
1826  // the same, and that each process owns all of its own diagonal
1827  // entries of the matrix.
1828 
1829  RCP<MV> X_colMap;
1830  RCP<MV> X_domainMap;
1831  bool copyBackOutput = false;
1832  if (importer.is_null ()) {
1833  if (X.isConstantStride ()) {
1834  X_colMap = rcpFromRef (X);
1835  X_domainMap = rcpFromRef (X);
1836 
1837  // Column Map and domain Map are the same, so there are no
1838  // remote entries. Thus, if we are not setting the initial
1839  // guess to zero, we don't have to worry about setting remote
1840  // entries to zero, even though we are not doing an Import in
1841  // this case.
1842  if (zeroInitialGuess) {
1843  X_colMap->putScalar (ZERO);
1844  }
1845  // No need to copy back to X at end.
1846  }
1847  else {
1848  // We must copy X into a constant stride multivector.
1849  // Just use the cached column Map multivector for that.
1850  // force=true means fill with zeros, so no need to fill
1851  // remote entries (not in domain Map) with zeros.
1852  //X_colMap = crsMat->getColumnMapMultiVector (X, true);
1853  X_colMap = rcp (new MV (colMap, X.getNumVectors ()));
1854  // X_domainMap is always a domain Map view of the column Map
1855  // multivector. In this case, the domain and column Maps are
1856  // the same, so X_domainMap _is_ X_colMap.
1857  X_domainMap = X_colMap;
1858  if (! zeroInitialGuess) { // Don't copy if zero initial guess
1859  try {
1860  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
1861  } catch (std::exception& e) {
1862  std::ostringstream os;
1863  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1864  "deep_copy(*X_domainMap, X) threw an exception: "
1865  << e.what () << ".";
1866  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
1867  }
1868  }
1869  copyBackOutput = true; // Don't forget to copy back at end.
1870  /*
1871  TPETRA_EFFICIENCY_WARNING(
1872  ! X.isConstantStride (),
1873  std::runtime_error,
1874  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
1875  "kernel requires that X and B both have constant stride. Since X "
1876  "does not have constant stride, we had to make a copy. This is a "
1877  "limitation of the current implementation and not your fault, but we "
1878  "still report it as an efficiency warning for your information.");
1879  */
1880  }
1881  }
1882  else { // Column Map and domain Map are _not_ the same.
1883  //X_colMap = crsMat->getColumnMapMultiVector (X);
1884  X_colMap = rcp (new MV (colMap, X.getNumVectors ()));
1885 
1886  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1887 
1888 #ifdef HAVE_IFPACK2_DEBUG
1889  auto X_colMap_host_view = X_colMap->template getLocalView<Kokkos::HostSpace> ();
1890  auto X_domainMap_host_view = X_domainMap->template getLocalView<Kokkos::HostSpace> ();
1891 
1892  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
1893  TEUCHOS_TEST_FOR_EXCEPTION(
1894  X_colMap_host_view.data () != X_domainMap_host_view.data (),
1895  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1896  "Pointer to start of column Map view of X is not equal to pointer to "
1897  "start of (domain Map view of) X. This may mean that "
1898  "Tpetra::MultiVector::offsetViewNonConst is broken. "
1899  "Please report this bug to the Tpetra developers.");
1900  }
1901 
1902  TEUCHOS_TEST_FOR_EXCEPTION(
1903  X_colMap_host_view.extent (0) < X_domainMap_host_view.extent (0) ||
1904  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
1905  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1906  "X_colMap has fewer local rows than X_domainMap. "
1907  "X_colMap_host_view.extent(0) = " << X_colMap_host_view.extent (0)
1908  << ", X_domainMap_host_view.extent(0) = "
1909  << X_domainMap_host_view.extent (0)
1910  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
1911  << ", and X_domainMap->getLocalLength() = "
1912  << X_domainMap->getLocalLength ()
1913  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
1914  "is broken. Please report this bug to the Tpetra developers.");
1915 
1916  TEUCHOS_TEST_FOR_EXCEPTION(
1917  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
1918  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1919  "X_colMap has a different number of columns than X_domainMap. "
1920  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
1921  << " != X_domainMap->getNumVectors() = "
1922  << X_domainMap->getNumVectors ()
1923  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
1924  "is broken. Please report this bug to the Tpetra developers.");
1925 #endif // HAVE_IFPACK2_DEBUG
1926 
1927  if (zeroInitialGuess) {
1928  // No need for an Import, since we're filling with zeros.
1929  X_colMap->putScalar (ZERO);
1930  } else {
1931  // We could just copy X into X_domainMap. However, that
1932  // wastes a copy, because the Import also does a copy (plus
1933  // communication). Since the typical use case for
1934  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1935  // don't want to waste that copy. Thus, we do the Import
1936  // here, and skip the first Import in the first sweep.
1937  // Importing directly from X effects the copy into X_domainMap
1938  // (which is a view of X_colMap).
1939  X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
1940  }
1941  copyBackOutput = true; // Don't forget to copy back at end.
1942  } // if column and domain Maps are (not) the same
1943 
1944  // The Gauss-Seidel / SOR kernel expects multivectors of constant
1945  // stride. X_colMap is by construction, but B might not be. If
1946  // it's not, we have to make a copy.
1947  RCP<const MV> B_in;
1948  if (B.isConstantStride ()) {
1949  B_in = rcpFromRef (B);
1950  }
1951  else {
1952  // Range Map and row Map are the same in this case, so we can
1953  // use the cached row Map multivector to store a constant stride
1954  // copy of B.
1955  //RCP<MV> B_in_nonconst = crsMat->getRowMapMultiVector (B, true);
1956  RCP<MV> B_in_nonconst = rcp (new MV (rowMap, B.getNumVectors()));
1957  try {
1958  deep_copy (*B_in_nonconst, B);
1959  } catch (std::exception& e) {
1960  std::ostringstream os;
1961  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1962  "deep_copy(*B_in_nonconst, B) threw an exception: "
1963  << e.what () << ".";
1964  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
1965  }
1966  B_in = rcp_const_cast<const MV> (B_in_nonconst);
1967 
1968  /*
1969  TPETRA_EFFICIENCY_WARNING(
1970  ! B.isConstantStride (),
1971  std::runtime_error,
1972  "MTGaussSeidel: The current implementation requires that B have "
1973  "constant stride. Since B does not have constant stride, we had to "
1974  "copy it into a separate constant-stride multivector. This is a "
1975  "limitation of the current implementation and not your fault, but we "
1976  "still report it as an efficiency warning for your information.");
1977  */
1978  }
1979 
1980  local_matrix_type kcsr = crsMat->getLocalMatrix ();
1981  const size_t NumVectors = X.getNumVectors ();
1982 
1983  bool update_y_vector = true;
1984  //false as it was done up already, and we dont want to zero it in each sweep.
1985  bool zero_x_vector = false;
1986 
1987  for (int sweep = 0; sweep < numSweeps; ++sweep) {
1988  if (! importer.is_null () && sweep > 0) {
1989  // We already did the first Import for the zeroth sweep above,
1990  // if it was necessary.
1991  X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
1992  }
1993 
1994  for (size_t indVec = 0; indVec < NumVectors; ++indVec) {
1995  if (direction == Tpetra::Symmetric) {
1996  KokkosSparse::Experimental::symmetric_gauss_seidel_apply
1997  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
1998  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
1999  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2000  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2001  zero_x_vector, update_y_vector);
2002  }
2003  else if (direction == Tpetra::Forward) {
2004  KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2005  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2006  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2007  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2008  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2009  zero_x_vector, update_y_vector);
2010  }
2011  else if (direction == Tpetra::Backward) {
2012  KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2013  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2014  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2015  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2016  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2017  zero_x_vector, update_y_vector);
2018  }
2019  else {
2020  TEUCHOS_TEST_FOR_EXCEPTION(
2021  true, std::invalid_argument,
2022  prefix << "The 'direction' enum does not have any of its valid "
2023  "values: Forward, Backward, or Symmetric.");
2024  }
2025  }
2026 
2027  if (NumVectors > 1){
2028  update_y_vector = true;
2029  }
2030  else {
2031  update_y_vector = false;
2032  }
2033  }
2034 
2035  if (copyBackOutput) {
2036  try {
2037  deep_copy (X , *X_domainMap); // Copy result back into X.
2038  } catch (std::exception& e) {
2039  TEUCHOS_TEST_FOR_EXCEPTION(
2040  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2041  "threw an exception: " << e.what ());
2042  }
2043  }
2044 
2045 }
2046 
2047 template<class MatrixType>
2048 void
2049 Relaxation<MatrixType>::
2050 ApplyInverseMTSGS_CrsMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2051  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X) const
2052 {
2053  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
2054  TEUCHOS_TEST_FOR_EXCEPTION
2055  (crsMat == NULL, std::logic_error, "Ifpack2::Relaxation::apply: "
2056  "Multithreaded Gauss-Seidel methods currently only work when the input "
2057  "matrix is a Tpetra::CrsMatrix.");
2058 
2059  using Teuchos::as;
2060  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2061 
2062  //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
2063  TEUCHOS_TEST_FOR_EXCEPTION
2064  (! localSmoothingIndices_.is_null (), std::logic_error,
2065  "Our implementation of Multithreaded Gauss-Seidel does not implement the "
2066  "use case where the user supplies an iteration order. "
2067  "This error used to appear as \"MT GaussSeidel ignores the given "
2068  "order\". "
2069  "I tried to add more explanation, but I didn't implement \"MT "
2070  "GaussSeidel\" [sic]. "
2071  "You'll have to ask the person who did.");
2072  this->MTGaussSeidel (crsMat, X, B, *Diagonal_, DampingFactor_, direction,
2073  NumSweeps_, ZeroStartingSolution_);
2074 
2075  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2076  const double numVectors = as<double> (X.getNumVectors ());
2077  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2078  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2079  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2080  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2081 }
2082 
2083 
2084 template<class MatrixType>
2085 void Relaxation<MatrixType>::ApplyInverseMTGS_CrsMatrix (
2086  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2087  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X) const {
2088 
2089  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
2090  TEUCHOS_TEST_FOR_EXCEPTION(
2091  crsMat == NULL, std::runtime_error, "Ifpack2::Relaxation::compute: "
2092  "MT methods works for CRSMatrix Only.");
2093 
2094  using Teuchos::as;
2095  const Tpetra::ESweepDirection direction =
2096  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
2097 
2098  //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
2099  TEUCHOS_TEST_FOR_EXCEPTION
2100  (! localSmoothingIndices_.is_null (), std::logic_error,
2101  "Our implementation of Multithreaded Gauss-Seidel does not implement the "
2102  "use case where the user supplies an iteration order. "
2103  "This error used to appear as \"MT GaussSeidel ignores the given "
2104  "order\". "
2105  "I tried to add more explanation, but I didn't implement \"MT "
2106  "GaussSeidel\" [sic]. "
2107  "You'll have to ask the person who did.");
2108  this->MTGaussSeidel (crsMat, X, B, *Diagonal_, DampingFactor_, direction,
2109  NumSweeps_, ZeroStartingSolution_);
2110 
2111  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2112  const double numVectors = as<double> (X.getNumVectors ());
2113  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2114  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2115  ApplyFlops_ += NumSweeps_ * numVectors *
2116  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2117 }
2118 
2119 template<class MatrixType>
2120 void
2121 Relaxation<MatrixType>::
2122 ApplyInverseSGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2123  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2124 {
2125  typedef Relaxation<MatrixType> this_type;
2126  // The CrsMatrix version is faster, because it can access the sparse
2127  // matrix data directly, rather than by copying out each row's data
2128  // in turn. Thus, we check whether the RowMatrix is really a
2129  // CrsMatrix.
2130  //
2131  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
2132  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
2133  // will still be correct if the cast fails, but it will use an
2134  // unoptimized kernel.
2135  const block_crs_matrix_type* blockCrsMat = dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr());
2136  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
2137  if (blockCrsMat != NULL) {
2138  const_cast<this_type*> (this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
2139  }
2140  else if (crsMat != NULL) {
2141  ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
2142  } else {
2143  ApplyInverseSGS_RowMatrix (X, Y);
2144  }
2145 }
2146 
2147 
2148 template<class MatrixType>
2149 void
2150 Relaxation<MatrixType>::
2151 ApplyInverseSGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2152  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2153 {
2154  using Teuchos::Array;
2155  using Teuchos::ArrayRCP;
2156  using Teuchos::ArrayView;
2157  using Teuchos::as;
2158  using Teuchos::RCP;
2159  using Teuchos::rcp;
2160  using Teuchos::rcpFromRef;
2161  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
2162 
2163 
2164  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
2165  // starting multivector itself. The generic RowMatrix version here
2166  // does not, so we have to zero out Y here.
2167  if (ZeroStartingSolution_) {
2168  Y.putScalar (STS::zero ());
2169  }
2170 
2171  const size_t NumVectors = X.getNumVectors ();
2172  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
2173  Array<local_ordinal_type> Indices (maxLength);
2174  Array<scalar_type> Values (maxLength);
2175 
2176  // Local smoothing stuff
2177  const size_t numMyRows = A_->getNodeNumRows();
2178  const local_ordinal_type * rowInd = 0;
2179  size_t numActive = numMyRows;
2180  bool do_local = !localSmoothingIndices_.is_null();
2181  if(do_local) {
2182  rowInd = localSmoothingIndices_.getRawPtr();
2183  numActive = localSmoothingIndices_.size();
2184  }
2185 
2186 
2187  RCP<MV> Y2;
2188  if (IsParallel_) {
2189  if (Importer_.is_null ()) { // domain and column Maps are the same.
2190  updateCachedMultiVector(Y.getMap(),NumVectors);
2191  } else {
2192  updateCachedMultiVector(Importer_->getTargetMap(),NumVectors);
2193  }
2194  Y2= cachedMV_;
2195  }
2196  else {
2197  Y2 = rcpFromRef (Y);
2198  }
2199 
2200  // Diagonal
2201  ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
2202  ArrayView<const scalar_type> d_ptr = d_rcp();
2203 
2204  // Constant stride check
2205  bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
2206 
2207  if(constant_stride) {
2208  // extract 1D RCPs
2209  size_t x_stride = X.getStride();
2210  size_t y2_stride = Y2->getStride();
2211  ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
2212  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
2213  ArrayView<scalar_type> y2_ptr = y2_rcp();
2214  ArrayView<const scalar_type> x_ptr = x_rcp();
2215  Array<scalar_type> dtemp(NumVectors,STS::zero());
2216 
2217  for (int j = 0; j < NumSweeps_; j++) {
2218  // data exchange is here, once per sweep
2219  if (IsParallel_) {
2220  if (Importer_.is_null ()) {
2221  // just copy, since domain and column Maps are the same
2222  Tpetra::deep_copy (*Y2, Y);
2223  } else {
2224  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2225  }
2226  }
2227  for (size_t ii = 0; ii < numActive; ++ii) {
2228  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2229  size_t NumEntries;
2230  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2231  dtemp.assign(NumVectors,STS::zero());
2232 
2233  for (size_t k = 0; k < NumEntries; ++k) {
2234  const local_ordinal_type col = Indices[k];
2235  for (size_t m = 0; m < NumVectors; ++m) {
2236  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2237  }
2238  }
2239 
2240  for (size_t m = 0; m < NumVectors; ++m) {
2241  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2242  }
2243  }
2244 
2245  // ptrdiff_t is the same size as size_t, but is signed. Being
2246  // signed is important so that i >= 0 is not trivially true.
2247  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2248  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2249  size_t NumEntries;
2250  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2251  dtemp.assign(NumVectors,STS::zero());
2252 
2253  for (size_t k = 0; k < NumEntries; ++k) {
2254  const local_ordinal_type col = Indices[k];
2255  for (size_t m = 0; m < NumVectors; ++m) {
2256  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2257  }
2258  }
2259 
2260  for (size_t m = 0; m < NumVectors; ++m) {
2261  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2262  }
2263  }
2264 
2265  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
2266  if (IsParallel_) {
2267  Tpetra::deep_copy (Y, *Y2);
2268  }
2269  }
2270  }
2271  else {
2272  // extract 2D RCPs
2273  ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
2274  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
2275 
2276  for (int iter = 0; iter < NumSweeps_; ++iter) {
2277  // only one data exchange per sweep
2278  if (IsParallel_) {
2279  if (Importer_.is_null ()) {
2280  // just copy, since domain and column Maps are the same
2281  Tpetra::deep_copy (*Y2, Y);
2282  } else {
2283  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2284  }
2285  }
2286 
2287  for (size_t ii = 0; ii < numActive; ++ii) {
2288  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2289  const scalar_type diag = d_ptr[i];
2290  size_t NumEntries;
2291  A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2292 
2293  for (size_t m = 0; m < NumVectors; ++m) {
2294  scalar_type dtemp = STS::zero ();
2295  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2296  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2297 
2298  for (size_t k = 0; k < NumEntries; ++k) {
2299  const local_ordinal_type col = Indices[k];
2300  dtemp += Values[k] * y2_local[col];
2301  }
2302  y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2303  }
2304  }
2305 
2306  // ptrdiff_t is the same size as size_t, but is signed. Being
2307  // signed is important so that i >= 0 is not trivially true.
2308  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2309  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2310  const scalar_type diag = d_ptr[i];
2311  size_t NumEntries;
2312  A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2313 
2314  for (size_t m = 0; m < NumVectors; ++m) {
2315  scalar_type dtemp = STS::zero ();
2316  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2317  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2318 
2319  for (size_t k = 0; k < NumEntries; ++k) {
2320  const local_ordinal_type col = Indices[k];
2321  dtemp += Values[k] * y2_local[col];
2322  }
2323  y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2324  }
2325  }
2326 
2327  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
2328  if (IsParallel_) {
2329  Tpetra::deep_copy (Y, *Y2);
2330  }
2331  }
2332  }
2333 
2334  // See flop count discussion in implementation of ApplyInverseSGS_CrsMatrix().
2335  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2336  const double numVectors = as<double> (X.getNumVectors ());
2337  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2338  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2339  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2340  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2341 }
2342 
2343 
2344 template<class MatrixType>
2345 void
2346 Relaxation<MatrixType>::
2347 ApplyInverseSGS_CrsMatrix (const crs_matrix_type& A,
2348  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2349  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2350 {
2351  using Teuchos::as;
2352  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2353  if (localSmoothingIndices_.is_null ()) {
2354  A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
2355  NumSweeps_, ZeroStartingSolution_);
2356  }
2357  else {
2358  A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
2359  DampingFactor_, direction,
2360  NumSweeps_, ZeroStartingSolution_);
2361  }
2362 
2363  // For each column of output, for each sweep over the matrix:
2364  //
2365  // - One + and one * for each matrix entry
2366  // - One / and one + for each row of the matrix
2367  // - If the damping factor is not one: one * for each row of the
2368  // matrix. (It's not fair to count this if the damping factor is
2369  // one, since the implementation could skip it. Whether it does
2370  // or not is the implementation's choice.)
2371  //
2372  // Each sweep of symmetric Gauss-Seidel / SOR counts as two sweeps,
2373  // one forward and one backward.
2374 
2375  // Floating-point operations due to the damping factor, per matrix
2376  // row, per direction, per columm of output.
2377  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2378  const double numVectors = as<double> (X.getNumVectors ());
2379  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2380  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2381  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2382  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2383 }
2384 
2385 template<class MatrixType>
2386 void
2387 Relaxation<MatrixType>::
2388 ApplyInverseSGS_BlockCrsMatrix (const block_crs_matrix_type& A,
2389  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2390  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
2391 {
2392  using Teuchos::RCP;
2393  using Teuchos::rcp;
2394  using Teuchos::rcpFromRef;
2395  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
2396  local_ordinal_type, global_ordinal_type, node_type> BMV;
2397  typedef Tpetra::MultiVector<scalar_type,
2398  local_ordinal_type, global_ordinal_type, node_type> MV;
2399 
2400  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
2401  //
2402  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
2403  // multiple right-hand sides, unless the input or output MultiVector
2404  // does not have constant stride. We should check for that case
2405  // here, in case it doesn't work in localGaussSeidel (which is
2406  // entirely possible).
2407  BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
2408  const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
2409 
2410  bool performImport = false;
2411  RCP<BMV> yBlockCol;
2412  if (Importer_.is_null ()) {
2413  yBlockCol = Teuchos::rcpFromRef (yBlock);
2414  }
2415  else {
2416  if (yBlockColumnPointMap_.is_null () ||
2417  yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
2418  yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
2419  yBlockColumnPointMap_ =
2420  rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
2421  static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
2422  }
2423  yBlockCol = yBlockColumnPointMap_;
2424  performImport = true;
2425  }
2426 
2427  if (ZeroStartingSolution_) {
2428  yBlockCol->putScalar (STS::zero ());
2429  }
2430  else if (performImport) {
2431  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2432  }
2433 
2434  // FIXME (mfh 12 Sep 2014) Shouldn't this come from the user's parameter?
2435  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2436 
2437  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2438  if (performImport && sweep > 0) {
2439  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2440  }
2441  A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
2442  DampingFactor_, direction);
2443  if (performImport) {
2444  RCP<const MV> yBlockColPointDomain =
2445  yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
2446  MV yBlockView = yBlock.getMultiVectorView ();
2447  Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
2448  }
2449  }
2450 }
2451 
2452 
2453 template<class MatrixType>
2455 {
2456  std::ostringstream os;
2457 
2458  // Output is a valid YAML dictionary in flow style. If you don't
2459  // like everything on a single line, you should call describe()
2460  // instead.
2461  os << "\"Ifpack2::Relaxation\": {";
2462 
2463  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2464  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2465 
2466  // It's useful to print this instance's relaxation method (Jacobi,
2467  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2468  // than that, call describe() instead.
2469  os << "Type: ";
2470  if (PrecType_ == Ifpack2::Details::JACOBI) {
2471  os << "Jacobi";
2472  } else if (PrecType_ == Ifpack2::Details::GS) {
2473  os << "Gauss-Seidel";
2474  } else if (PrecType_ == Ifpack2::Details::SGS) {
2475  os << "Symmetric Gauss-Seidel";
2476  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2477  os << "MT Gauss-Seidel";
2478  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2479  os << "MT Symmetric Gauss-Seidel";
2480  }
2481  else {
2482  os << "INVALID";
2483  }
2484 
2485  os << ", " << "sweeps: " << NumSweeps_ << ", "
2486  << "damping factor: " << DampingFactor_ << ", ";
2487  if (DoL1Method_) {
2488  os << "use l1: " << DoL1Method_ << ", "
2489  << "l1 eta: " << L1Eta_ << ", ";
2490  }
2491 
2492  if (A_.is_null ()) {
2493  os << "Matrix: null";
2494  }
2495  else {
2496  os << "Global matrix dimensions: ["
2497  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2498  << ", Global nnz: " << A_->getGlobalNumEntries();
2499  }
2500 
2501  os << "}";
2502  return os.str ();
2503 }
2504 
2505 
2506 template<class MatrixType>
2507 void
2509 describe (Teuchos::FancyOStream &out,
2510  const Teuchos::EVerbosityLevel verbLevel) const
2511 {
2512  using Teuchos::OSTab;
2513  using Teuchos::TypeNameTraits;
2514  using Teuchos::VERB_DEFAULT;
2515  using Teuchos::VERB_NONE;
2516  using Teuchos::VERB_LOW;
2517  using Teuchos::VERB_MEDIUM;
2518  using Teuchos::VERB_HIGH;
2519  using Teuchos::VERB_EXTREME;
2520  using std::endl;
2521 
2522  const Teuchos::EVerbosityLevel vl =
2523  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2524 
2525  const int myRank = this->getComm ()->getRank ();
2526 
2527  // none: print nothing
2528  // low: print O(1) info from Proc 0
2529  // medium:
2530  // high:
2531  // extreme:
2532 
2533  if (vl != VERB_NONE && myRank == 0) {
2534  // Describable interface asks each implementation to start with a tab.
2535  OSTab tab1 (out);
2536  // Output is valid YAML; hence the quotes, to protect the colons.
2537  out << "\"Ifpack2::Relaxation\":" << endl;
2538  OSTab tab2 (out);
2539  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2540  << endl;
2541  if (this->getObjectLabel () != "") {
2542  out << "Label: " << this->getObjectLabel () << endl;
2543  }
2544  out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2545  << "Computed: " << (isComputed () ? "true" : "false") << endl
2546  << "Parameters: " << endl;
2547  {
2548  OSTab tab3 (out);
2549  out << "\"relaxation: type\": ";
2550  if (PrecType_ == Ifpack2::Details::JACOBI) {
2551  out << "Jacobi";
2552  } else if (PrecType_ == Ifpack2::Details::GS) {
2553  out << "Gauss-Seidel";
2554  } else if (PrecType_ == Ifpack2::Details::SGS) {
2555  out << "Symmetric Gauss-Seidel";
2556  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2557  out << "MT Gauss-Seidel";
2558  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2559  out << "MT Symmetric Gauss-Seidel";
2560  } else {
2561  out << "INVALID";
2562  }
2563  // We quote these parameter names because they contain colons.
2564  // YAML uses the colon to distinguish key from value.
2565  out << endl
2566  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2567  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2568  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2569  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2570  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2571  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2572  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2573  }
2574  out << "Computed quantities:" << endl;
2575  {
2576  OSTab tab3 (out);
2577  out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2578  << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2579  }
2580  if (checkDiagEntries_ && isComputed ()) {
2581  out << "Properties of input diagonal entries:" << endl;
2582  {
2583  OSTab tab3 (out);
2584  out << "Magnitude of minimum-magnitude diagonal entry: "
2585  << globalMinMagDiagEntryMag_ << endl
2586  << "Magnitude of maximum-magnitude diagonal entry: "
2587  << globalMaxMagDiagEntryMag_ << endl
2588  << "Number of diagonal entries with small magnitude: "
2589  << globalNumSmallDiagEntries_ << endl
2590  << "Number of zero diagonal entries: "
2591  << globalNumZeroDiagEntries_ << endl
2592  << "Number of diagonal entries with negative real part: "
2593  << globalNumNegDiagEntries_ << endl
2594  << "Abs 2-norm diff between computed and actual inverse "
2595  << "diagonal: " << globalDiagNormDiff_ << endl;
2596  }
2597  }
2598  if (isComputed ()) {
2599  out << "Saved diagonal offsets: "
2600  << (savedDiagOffsets_ ? "true" : "false") << endl;
2601  }
2602  out << "Call counts and total times (in seconds): " << endl;
2603  {
2604  OSTab tab3 (out);
2605  out << "initialize: " << endl;
2606  {
2607  OSTab tab4 (out);
2608  out << "Call count: " << NumInitialize_ << endl;
2609  out << "Total time: " << InitializeTime_ << endl;
2610  }
2611  out << "compute: " << endl;
2612  {
2613  OSTab tab4 (out);
2614  out << "Call count: " << NumCompute_ << endl;
2615  out << "Total time: " << ComputeTime_ << endl;
2616  }
2617  out << "apply: " << endl;
2618  {
2619  OSTab tab4 (out);
2620  out << "Call count: " << NumApply_ << endl;
2621  out << "Total time: " << ApplyTime_ << endl;
2622  }
2623  }
2624  }
2625 }
2626 
2627 } // namespace Ifpack2
2628 
2629 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2630  template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2631 
2632 #endif // IFPACK2_RELAXATION_DEF_HPP
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2454
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:406
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:422
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:452
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:184
void compute()
Compute the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:882
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:484
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:416
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:434
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:440
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:164
Ifpack2 implementation details.
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:393
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:384
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:247
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:363
File for utility functions.
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:471
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:464
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:230
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:244
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:458
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:446
Definition: Ifpack2_Container.hpp:774
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:428
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:241
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:373
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:238
virtual ~Relaxation()
Destructor.
Definition: Ifpack2_Relaxation_def.hpp:225
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:586
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:223
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2509
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:50
void initialize()
Initialize the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:607
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:250