53 #ifndef AMESOS2_SUPERLU_DEF_HPP 54 #define AMESOS2_SUPERLU_DEF_HPP 56 #include <Teuchos_Tuple.hpp> 57 #include <Teuchos_ParameterList.hpp> 58 #include <Teuchos_StandardParameterEntryValidators.hpp> 63 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU) 70 #include "KokkosSparse_sptrsv_superlu.hpp" 76 template <
class Matrix,
class Vector>
78 Teuchos::RCP<const Matrix> A,
79 Teuchos::RCP<Vector> X,
80 Teuchos::RCP<const Vector> B )
82 , is_contiguous_(true)
83 , use_triangular_solves_(false)
85 , symmetrize_metis_(true)
86 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU)
87 , sptrsv_invert_diag_(true)
88 , sptrsv_invert_offdiag_ (false)
89 , sptrsv_u_in_csr_ (true)
90 , sptrsv_merge_supernodes_ (false)
91 , sptrsv_use_spmv_ (false)
99 SLU::set_default_options(&(data_.options));
101 data_.options.PrintStat = SLU::NO;
103 SLU::StatInit(&(data_.stat));
105 Kokkos::resize(data_.perm_r, this->globalNumRows_);
106 Kokkos::resize(data_.perm_c, this->globalNumCols_);
107 Kokkos::resize(data_.etree, this->globalNumCols_);
108 Kokkos::resize(data_.R, this->globalNumRows_);
109 Kokkos::resize(data_.C, this->globalNumCols_);
111 data_.relax = SLU::sp_ienv(2);
112 data_.panel_size = SLU::sp_ienv(1);
115 data_.rowequ =
false;
116 data_.colequ =
false;
117 data_.A.Store = NULL;
118 data_.L.Store = NULL;
119 data_.U.Store = NULL;
120 data_.X.Store = NULL;
121 data_.B.Store = NULL;
127 template <
class Matrix,
class Vector>
135 SLU::StatFree( &(data_.stat) ) ;
138 if ( data_.A.Store != NULL ){
139 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
143 if ( data_.L.Store != NULL ){
144 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
145 SLU::Destroy_CompCol_Matrix( &(data_.U) );
149 template <
class Matrix,
class Vector>
153 std::ostringstream oss;
154 oss <<
"SuperLU solver interface";
156 oss <<
", \"ILUTP\" : {";
157 oss <<
"drop tol = " << data_.options.ILU_DropTol;
158 oss <<
", fill factor = " << data_.options.ILU_FillFactor;
159 oss <<
", fill tol = " << data_.options.ILU_FillTol;
160 switch(data_.options.ILU_MILU) {
172 oss <<
", regular ILU";
174 switch(data_.options.ILU_Norm) {
183 oss <<
", infinity-norm";
187 oss <<
", direct solve";
203 template<
class Matrix,
class Vector>
215 int permc_spec = data_.options.ColPerm;
216 if ( permc_spec != SLU::MY_PERMC && this->root_ ){
217 #ifdef HAVE_AMESOS2_TIMERS 218 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
221 SLU::get_perm_c(permc_spec, &(data_.A), data_.perm_c.data());
228 template <
class Matrix,
class Vector>
243 same_symbolic_ =
false;
244 data_.options.Fact = SLU::DOFACT;
247 #ifdef HAVE_AMESOS2_TIMERS 248 Teuchos::RCP< Teuchos::Time > MetisTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for METIS");
249 Teuchos::TimeMonitor numFactTimer(*MetisTimer_);
251 size_t n = this->globalNumRows_;
252 size_t nz = this->globalNumNonZeros_;
253 host_size_type_array host_rowind_view_ = host_rows_view_;
254 host_ordinal_type_array host_colptr_view_ = host_col_ptr_view_;
257 if (symmetrize_metis_) {
264 SLU::at_plus_a(n, nz, host_col_ptr_view_.data(), host_rows_view_.data(),
265 &new_nz, &new_col_ptr, &new_row_ind);
270 Kokkos::resize (host_rowind_view_, nz);
271 Kokkos::realloc(host_colptr_view_, n+1);
272 for (
size_t i = 0; i <= n; i++) {
273 host_colptr_view_(i) = new_col_ptr[i];
275 for (
size_t i = 0; i < nz; i++) {
276 host_rowind_view_(i) = new_row_ind[i];
280 SLU::SUPERLU_FREE(new_col_ptr);
281 SLU::SUPERLU_FREE(new_row_ind);
285 using metis_int_array_type = host_ordinal_type_array;
286 metis_int_array_type metis_perm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_perm"), n);
287 metis_int_array_type metis_iperm = metis_int_array_type(Kokkos::ViewAllocateWithoutInitializing(
"metis_iperm"), n);
291 Amesos2::Util::reorder(
292 host_colptr_view_, host_rowind_view_,
293 metis_perm, metis_iperm, new_nnz,
false);
295 for (
size_t i = 0; i < n; i++) {
296 data_.perm_r(i) = metis_iperm(i);
297 data_.perm_c(i) = metis_iperm(i);
301 data_.options.ColPerm = SLU::MY_PERMC;
302 data_.options.RowPerm = SLU::MY_PERMR;
311 template <
class Matrix,
class Vector>
320 if ( !same_symbolic_ && data_.L.Store != NULL ){
321 SLU::Destroy_SuperNode_Matrix( &(data_.L) );
322 SLU::Destroy_CompCol_Matrix( &(data_.U) );
323 data_.L.Store = NULL;
324 data_.U.Store = NULL;
327 if( same_symbolic_ ) data_.options.Fact = SLU::SamePattern_SameRowPerm;
332 #ifdef HAVE_AMESOS2_DEBUG 333 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.ncol != as<int>(this->globalNumCols_),
335 "Error in converting to SuperLU SuperMatrix: wrong number of global columns." );
336 TEUCHOS_TEST_FOR_EXCEPTION( data_.A.nrow != as<int>(this->globalNumRows_),
338 "Error in converting to SuperLU SuperMatrix: wrong number of global rows." );
341 if( data_.options.Equil == SLU::YES ){
342 magnitude_type rowcnd, colcnd, amax;
346 function_map::gsequ(&(data_.A), data_.R.data(),
347 data_.C.data(), &rowcnd, &colcnd,
349 TEUCHOS_TEST_FOR_EXCEPTION
350 (info2 < 0, std::runtime_error,
351 "SuperLU's gsequ function returned with status " << info2 <<
" < 0." 352 " This means that argument " << (-info2) <<
" given to the function" 353 " had an illegal value.");
355 if (info2 <= data_.A.nrow) {
356 TEUCHOS_TEST_FOR_EXCEPTION
357 (
true, std::runtime_error,
"SuperLU's gsequ function returned with " 358 "info = " << info2 <<
" > 0, and info <= A.nrow = " << data_.A.nrow
359 <<
". This means that row " << info2 <<
" of A is exactly zero.");
361 else if (info2 > data_.A.ncol) {
362 TEUCHOS_TEST_FOR_EXCEPTION
363 (
true, std::runtime_error,
"SuperLU's gsequ function returned with " 364 "info = " << info2 <<
" > 0, and info > A.ncol = " << data_.A.ncol
365 <<
". This means that column " << (info2 - data_.A.nrow) <<
" of " 366 "A is exactly zero.");
369 TEUCHOS_TEST_FOR_EXCEPTION
370 (
true, std::runtime_error,
"SuperLU's gsequ function returned " 371 "with info = " << info2 <<
" > 0, but its value is not in the " 372 "range permitted by the documentation. This should never happen " 373 "(it appears to be SuperLU's fault).");
378 function_map::laqgs(&(data_.A), data_.R.data(),
379 data_.C.data(), rowcnd, colcnd,
380 amax, &(data_.equed));
383 data_.rowequ = (data_.equed ==
'R') || (data_.equed ==
'B');
384 data_.colequ = (data_.equed ==
'C') || (data_.equed ==
'B');
389 SLU::sp_preorder(&(data_.options), &(data_.A), data_.perm_c.data(),
390 data_.etree.data(), &(data_.AC));
393 #ifdef HAVE_AMESOS2_TIMERS 394 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
397 #ifdef HAVE_AMESOS2_VERBOSE_DEBUG 398 std::cout <<
"Superlu:: Before numeric factorization" << std::endl;
399 std::cout <<
"nzvals_ : " << nzvals_.toString() << std::endl;
400 std::cout <<
"rowind_ : " << rowind_.toString() << std::endl;
401 std::cout <<
"colptr_ : " << colptr_.toString() << std::endl;
404 if(ILU_Flag_==
false) {
405 function_map::gstrf(&(data_.options), &(data_.AC),
406 data_.relax, data_.panel_size, data_.etree.data(),
407 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
408 &(data_.L), &(data_.U),
409 #ifdef HAVE_AMESOS2_SUPERLU5_API 412 &(data_.stat), &info);
415 function_map::gsitrf(&(data_.options), &(data_.AC),
416 data_.relax, data_.panel_size, data_.etree.data(),
417 NULL, 0, data_.perm_c.data(), data_.perm_r.data(),
418 &(data_.L), &(data_.U),
419 #ifdef HAVE_AMESOS2_SUPERLU5_API 422 &(data_.stat), &info);
427 SLU::Destroy_CompCol_Permuted( &(data_.AC) );
430 this->setNnzLU( as<size_t>(((SLU::SCformat*)data_.L.Store)->nnz +
431 ((SLU::NCformat*)data_.U.Store)->nnz) );
435 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
437 global_size_type info_st = as<global_size_type>(info);
438 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st <= this->globalNumCols_),
440 "Factorization complete, but matrix is singular. Division by zero eminent");
441 TEUCHOS_TEST_FOR_EXCEPTION( (info_st > 0) && (info_st > this->globalNumCols_),
443 "Memory allocation failure in Superlu factorization");
445 data_.options.Fact = SLU::FACTORED;
446 same_symbolic_ =
true;
448 if(use_triangular_solves_) {
449 #ifdef HAVE_AMESOS2_TIMERS 450 Teuchos::RCP< Teuchos::Time > SpTrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv setup");
451 Teuchos::TimeMonitor numFactTimer(*SpTrsvTimer_);
453 triangular_solve_factor();
460 template <
class Matrix,
class Vector>
466 #ifdef HAVE_AMESOS2_TIMERS 467 Teuchos::RCP< Teuchos::Time > Amesos2SolveTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for Amesos2");
468 Teuchos::TimeMonitor solveTimer(*Amesos2SolveTimer_);
471 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
472 const size_t nrhs = X->getGlobalNumVectors();
474 bool bDidAssignX =
false;
475 bool bDidAssignB =
false;
477 #ifdef HAVE_AMESOS2_TIMERS 478 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
479 Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ );
484 const bool initialize_data =
true;
485 const bool do_not_initialize_data =
false;
486 if(use_triangular_solves_) {
487 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU) 488 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
489 device_solve_array_t>::do_get(do_not_initialize_data, X, device_xValues_,
492 this->rowIndexBase_);
493 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
494 device_solve_array_t>::do_get(initialize_data, B, device_bValues_,
497 this->rowIndexBase_);
501 bDidAssignX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
502 host_solve_array_t>::do_get(do_not_initialize_data, X, host_xValues_,
505 this->rowIndexBase_);
506 bDidAssignB = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
507 host_solve_array_t>::do_get(initialize_data, B, host_bValues_,
510 this->rowIndexBase_);
518 if(bDidAssignB && data_.equed !=
'N') {
519 if(use_triangular_solves_) {
520 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU) 521 device_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
522 device_bValues_.extent(0), device_bValues_.extent(1));
523 Kokkos::deep_copy(copyB, device_bValues_);
524 device_bValues_ = copyB;
528 host_solve_array_t copyB(Kokkos::ViewAllocateWithoutInitializing(
"copyB"),
529 host_bValues_.extent(0), host_bValues_.extent(1));
530 Kokkos::deep_copy(copyB, host_bValues_);
531 host_bValues_ = copyB;
537 magnitude_type rpg, rcond;
542 #ifdef HAVE_AMESOS2_TIMERS 543 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
546 if(use_triangular_solves_) {
550 Kokkos::resize(data_.ferr, nrhs);
551 Kokkos::resize(data_.berr, nrhs);
554 #ifdef HAVE_AMESOS2_TIMERS 555 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
557 SLU::Dtype_t dtype = type_map::dtype;
558 int i_ld_rhs = as<int>(ld_rhs);
559 function_map::create_Dense_Matrix(&(data_.B), i_ld_rhs, as<int>(nrhs),
560 convert_bValues_, host_bValues_, i_ld_rhs,
561 SLU::SLU_DN, dtype, SLU::SLU_GE);
562 function_map::create_Dense_Matrix(&(data_.X), i_ld_rhs, as<int>(nrhs),
563 convert_xValues_, host_xValues_, i_ld_rhs,
564 SLU::SLU_DN, dtype, SLU::SLU_GE);
567 if(ILU_Flag_==
false) {
568 function_map::gssvx(&(data_.options), &(data_.A),
569 data_.perm_c.data(), data_.perm_r.data(),
570 data_.etree.data(), &(data_.equed), data_.R.data(),
571 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
572 &(data_.X), &rpg, &rcond, data_.ferr.data(),
574 #ifdef HAVE_AMESOS2_SUPERLU5_API 577 &(data_.mem_usage), &(data_.stat), &ierr);
580 function_map::gsisx(&(data_.options), &(data_.A),
581 data_.perm_c.data(), data_.perm_r.data(),
582 data_.etree.data(), &(data_.equed), data_.R.data(),
583 data_.C.data(), &(data_.L), &(data_.U), NULL, 0, &(data_.B),
584 &(data_.X), &rpg, &rcond,
585 #ifdef HAVE_AMESOS2_SUPERLU5_API 588 &(data_.mem_usage), &(data_.stat), &ierr);
592 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
593 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
594 data_.X.Store = NULL;
595 data_.B.Store = NULL;
602 #ifdef HAVE_AMESOS2_TIMERS 603 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
605 function_map::convert_back_Dense_Matrix(convert_bValues_, host_bValues_);
606 function_map::convert_back_Dense_Matrix(convert_xValues_, host_xValues_);
610 SLU::Destroy_SuperMatrix_Store( &(data_.X) );
611 SLU::Destroy_SuperMatrix_Store( &(data_.B) );
612 data_.X.Store = NULL;
613 data_.B.Store = NULL;
617 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
619 global_size_type ierr_st = as<global_size_type>(ierr);
620 TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
621 std::invalid_argument,
622 "Argument " << -ierr <<
" to SuperLU xgssvx had illegal value" );
623 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st <= this->globalNumCols_,
625 "Factorization complete, but U is exactly singular" );
626 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0 && ierr_st > this->globalNumCols_ + 1,
628 "SuperLU allocated " << ierr - this->globalNumCols_ <<
" bytes of " 629 "memory before allocation failure occured." );
636 #ifdef HAVE_AMESOS2_TIMERS 637 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
640 if(use_triangular_solves_) {
641 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU) 642 Util::put_1d_data_helper_kokkos_view<
646 this->rowIndexBase_);
650 Util::put_1d_data_helper_kokkos_view<
654 this->rowIndexBase_);
662 template <
class Matrix,
class Vector>
669 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
673 template <
class Matrix,
class Vector>
678 using Teuchos::getIntegralValue;
679 using Teuchos::ParameterEntryValidator;
681 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
683 ILU_Flag_ = parameterList->get<
bool>(
"ILU_Flag",
false);
685 SLU::ilu_set_default_options(&(data_.options));
687 data_.options.PrintStat = SLU::NO;
690 data_.options.Trans = this->control_.useTranspose_ ? SLU::TRANS : SLU::NOTRANS;
692 if( parameterList->isParameter(
"Trans") ){
693 RCP<const ParameterEntryValidator> trans_validator = valid_params->getEntry(
"Trans").validator();
694 parameterList->getEntry(
"Trans").setValidator(trans_validator);
696 data_.options.Trans = getIntegralValue<SLU::trans_t>(*parameterList,
"Trans");
699 if( parameterList->isParameter(
"IterRefine") ){
700 RCP<const ParameterEntryValidator> refine_validator = valid_params->getEntry(
"IterRefine").validator();
701 parameterList->getEntry(
"IterRefine").setValidator(refine_validator);
703 data_.options.IterRefine = getIntegralValue<SLU::IterRefine_t>(*parameterList,
"IterRefine");
706 if( parameterList->isParameter(
"ColPerm") ){
707 RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry(
"ColPerm").validator();
708 parameterList->getEntry(
"ColPerm").setValidator(colperm_validator);
710 data_.options.ColPerm = getIntegralValue<SLU::colperm_t>(*parameterList,
"ColPerm");
713 data_.options.DiagPivotThresh = parameterList->get<
double>(
"DiagPivotThresh", 1.0);
715 bool equil = parameterList->get<
bool>(
"Equil",
true);
716 data_.options.Equil = equil ? SLU::YES : SLU::NO;
718 bool symmetric_mode = parameterList->get<
bool>(
"SymmetricMode",
false);
719 data_.options.SymmetricMode = symmetric_mode ? SLU::YES : SLU::NO;
722 if( parameterList->isParameter(
"RowPerm") ){
723 RCP<const ParameterEntryValidator> rowperm_validator = valid_params->getEntry(
"RowPerm").validator();
724 parameterList->getEntry(
"RowPerm").setValidator(rowperm_validator);
725 data_.options.RowPerm = getIntegralValue<SLU::rowperm_t>(*parameterList,
"RowPerm");
734 data_.options.ILU_DropTol = parameterList->get<
double>(
"ILU_DropTol", 0.0001);
736 data_.options.ILU_FillFactor = parameterList->get<
double>(
"ILU_FillFactor", 10.0);
738 if( parameterList->isParameter(
"ILU_Norm") ) {
739 RCP<const ParameterEntryValidator> norm_validator = valid_params->getEntry(
"ILU_Norm").validator();
740 parameterList->getEntry(
"ILU_Norm").setValidator(norm_validator);
741 data_.options.ILU_Norm = getIntegralValue<SLU::norm_t>(*parameterList,
"ILU_Norm");
744 if( parameterList->isParameter(
"ILU_MILU") ) {
745 RCP<const ParameterEntryValidator> milu_validator = valid_params->getEntry(
"ILU_MILU").validator();
746 parameterList->getEntry(
"ILU_MILU").setValidator(milu_validator);
747 data_.options.ILU_MILU = getIntegralValue<SLU::milu_t>(*parameterList,
"ILU_MILU");
750 data_.options.ILU_FillTol = parameterList->get<
double>(
"ILU_FillTol", 0.01);
752 is_contiguous_ = parameterList->get<
bool>(
"IsContiguous",
true);
754 use_triangular_solves_ = parameterList->get<
bool>(
"Enable_KokkosKernels_TriangularSolves",
false);
756 use_metis_ = parameterList->get<
bool>(
"UseMetis",
false);
757 symmetrize_metis_ = parameterList->get<
bool>(
"SymmetrizeMetis",
true);
758 if(use_triangular_solves_) {
759 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU) 761 sptrsv_invert_diag_ = parameterList->get<
bool>(
"SpTRSV_Invert_Diag",
true);
763 sptrsv_invert_offdiag_ = parameterList->get<
bool>(
"SpTRSV_Invert_OffDiag",
false);
765 sptrsv_u_in_csr_ = parameterList->get<
bool>(
"SpTRSV_U_CSR",
true);
767 sptrsv_merge_supernodes_ = parameterList->get<
bool>(
"SpTRSV_Merge_Supernodes",
false);
769 sptrsv_use_spmv_ = parameterList->get<
bool>(
"SpTRSV_Use_SpMV",
false);
771 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
772 "Calling for triangular solves but KokkosKernels_ENABLE_SUPERNODAL_SPTRSV and KokkosKernels_ENABLE_TPL_SUPERLU were not configured." );
778 template <
class Matrix,
class Vector>
779 Teuchos::RCP<const Teuchos::ParameterList>
783 using Teuchos::tuple;
784 using Teuchos::ParameterList;
785 using Teuchos::EnhancedNumberValidator;
786 using Teuchos::setStringToIntegralParameter;
787 using Teuchos::stringToIntegralParameterEntryValidator;
789 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
791 if( is_null(valid_params) ){
792 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
794 setStringToIntegralParameter<SLU::trans_t>(
"Trans",
"NOTRANS",
795 "Solve for the transpose system or not",
796 tuple<string>(
"TRANS",
"NOTRANS",
"CONJ"),
797 tuple<string>(
"Solve with transpose",
798 "Do not solve with transpose",
799 "Solve with the conjugate transpose"),
800 tuple<SLU::trans_t>(SLU::TRANS,
805 setStringToIntegralParameter<SLU::IterRefine_t>(
"IterRefine",
"NOREFINE",
806 "Type of iterative refinement to use",
807 tuple<string>(
"NOREFINE",
"SLU_SINGLE",
"SLU_DOUBLE"),
808 tuple<string>(
"Do not use iterative refinement",
809 "Do single iterative refinement",
810 "Do double iterative refinement"),
811 tuple<SLU::IterRefine_t>(SLU::NOREFINE,
817 setStringToIntegralParameter<SLU::colperm_t>(
"ColPerm",
"COLAMD",
818 "Specifies how to permute the columns of the " 819 "matrix for sparsity preservation",
820 tuple<string>(
"NATURAL",
"MMD_AT_PLUS_A",
821 "MMD_ATA",
"COLAMD"),
822 tuple<string>(
"Natural ordering",
823 "Minimum degree ordering on A^T + A",
824 "Minimum degree ordering on A^T A",
825 "Approximate minimum degree column ordering"),
826 tuple<SLU::colperm_t>(SLU::NATURAL,
832 Teuchos::RCP<EnhancedNumberValidator<double> > diag_pivot_thresh_validator
833 = Teuchos::rcp(
new EnhancedNumberValidator<double>(0.0, 1.0) );
834 pl->set(
"DiagPivotThresh", 1.0,
835 "Specifies the threshold used for a diagonal entry to be an acceptable pivot",
836 diag_pivot_thresh_validator);
838 pl->set(
"Equil",
true,
"Whether to equilibrate the system before solve");
840 pl->set(
"SymmetricMode",
false,
841 "Specifies whether to use the symmetric mode. " 842 "Gives preference to diagonal pivots and uses " 843 "an (A^T + A)-based column permutation.");
847 setStringToIntegralParameter<SLU::rowperm_t>(
"RowPerm",
"LargeDiag",
848 "Type of row permutation strategy to use",
849 tuple<string>(
"NOROWPERM",
"LargeDiag",
"MY_PERMR"),
850 tuple<string>(
"Use natural ordering",
851 "Use weighted bipartite matching algorithm",
852 "Use the ordering given in perm_r input"),
853 tuple<SLU::rowperm_t>(SLU::NOROWPERM,
854 #if SUPERLU_MAJOR_VERSION > 5 || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION > 2) || (SUPERLU_MAJOR_VERSION == 5 && SUPERLU_MINOR_VERSION == 2 && SUPERLU_PATCH_VERSION >= 2) 876 pl->set(
"ILU_DropTol", 0.0001,
"ILUT drop tolerance");
878 pl->set(
"ILU_FillFactor", 10.0,
"ILUT fill factor");
880 setStringToIntegralParameter<SLU::norm_t>(
"ILU_Norm",
"INF_NORM",
881 "Type of norm to use",
882 tuple<string>(
"ONE_NORM",
"TWO_NORM",
"INF_NORM"),
883 tuple<string>(
"1-norm",
"2-norm",
"inf-norm"),
884 tuple<SLU::norm_t>(SLU::ONE_NORM,SLU::TWO_NORM,SLU::INF_NORM),
887 setStringToIntegralParameter<SLU::milu_t>(
"ILU_MILU",
"SILU",
888 "Type of modified ILU to use",
889 tuple<string>(
"SILU",
"SMILU_1",
"SMILU_2",
"SMILU_3"),
890 tuple<string>(
"Regular ILU",
"MILU 1",
"MILU 2",
"MILU 3"),
891 tuple<SLU::milu_t>(SLU::SILU,SLU::SMILU_1,SLU::SMILU_2,
895 pl->set(
"ILU_FillTol", 0.01,
"ILUT fill tolerance");
897 pl->set(
"ILU_Flag",
false,
"ILU flag: if true, run ILU routines");
899 pl->set(
"Enable_KokkosKernels_TriangularSolves",
false,
"Whether to use triangular solves.");
901 pl->set(
"IsContiguous",
true,
"Whether GIDs contiguous");
904 pl->set(
"UseMetis",
false,
"Whether to call METIS before SuperLU");
905 pl->set(
"SymmetrizeMetis",
true,
"Whether to symmetrize matrix before METIS");
907 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU) 908 pl->set(
"SpTRSV_Invert_Diag",
true,
"specify whether to invert diagonal blocks for supernodal sparse-trianguular solve");
909 pl->set(
"SpTRSV_Invert_OffDiag",
false,
"specify whether to apply diagonal-inversion to off-diagonal blocks for supernodal sparse-trianguular solve");
910 pl->set(
"SpTRSV_U_CSR",
true,
"specify whether to store U in CSR forma for supernodal sparse-trianguular solve");
911 pl->set(
"SpTRSV_Merge_Supernodes",
false,
"specify whether to merge supernodes with similar sparsity structures for supernodal sparse-trianguular solve");
912 pl->set(
"SpTRSV_Use_SpMV",
false,
"specify whether to use spmv for supernodal sparse-trianguular solve");
922 template <
class Matrix,
class Vector>
928 #ifdef HAVE_AMESOS2_TIMERS 929 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
933 if( current_phase == SYMBFACT )
return false;
936 if( data_.A.Store != NULL ){
937 SLU::Destroy_SuperMatrix_Store( &(data_.A) );
938 data_.A.Store = NULL;
943 Kokkos::resize(host_nzvals_view_, this->globalNumNonZeros_);
944 Kokkos::resize(host_rows_view_, this->globalNumNonZeros_);
945 Kokkos::resize(host_col_ptr_view_, this->globalNumRows_ + 1);
950 #ifdef HAVE_AMESOS2_TIMERS 951 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
954 TEUCHOS_TEST_FOR_EXCEPTION( this->rowIndexBase_ != this->columnIndexBase_,
956 "Row and column maps have different indexbase ");
958 if ( is_contiguous_ ==
true ) {
961 host_size_type_array>::do_get(this->matrixA_.ptr(),
962 host_nzvals_view_, host_rows_view_,
963 host_col_ptr_view_, nnz_ret,
ROOTED,
965 this->rowIndexBase_);
970 host_size_type_array>::do_get(this->matrixA_.ptr(),
971 host_nzvals_view_, host_rows_view_,
974 this->rowIndexBase_);
979 SLU::Dtype_t dtype = type_map::dtype;
982 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<int>(this->globalNumNonZeros_),
984 "Did not get the expected number of non-zero vals");
986 function_map::template create_CompCol_Matrix<host_value_type_array>( &(data_.A),
987 this->globalNumRows_, this->globalNumCols_,
989 convert_nzvals_, host_nzvals_view_,
990 host_rows_view_.data(),
991 host_col_ptr_view_.data(),
992 SLU::SLU_NC, dtype, SLU::SLU_GE);
998 template <
class Matrix,
class Vector>
1002 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU) 1003 size_t ld_rhs = this->matrixA_->getGlobalNumRows();
1006 SLU::SCformat * Lstore = (SLU::SCformat*)(data_.L.Store);
1007 int nsuper = 1 + Lstore->nsuper;
1008 Kokkos::resize(data_.parents, nsuper);
1009 for (
int s = 0; s < nsuper; s++) {
1010 int j = Lstore->sup_to_col[s+1]-1;
1011 if (data_.etree[j] == static_cast<int>(ld_rhs)) {
1012 data_.parents(s) = -1;
1014 data_.parents(s) = Lstore->col_to_sup[data_.etree[j]];
1018 deep_copy_or_assign_view(device_trsv_perm_r_, data_.perm_r);
1019 deep_copy_or_assign_view(device_trsv_perm_c_, data_.perm_c);
1020 if (data_.options.Equil == SLU::YES) {
1021 deep_copy_or_assign_view(device_trsv_R_, data_.R);
1022 deep_copy_or_assign_view(device_trsv_C_, data_.C);
1026 if (sptrsv_use_spmv_) {
1027 device_khL_.create_sptrsv_handle(
1028 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
true);
1029 device_khU_.create_sptrsv_handle(
1030 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_SPMV_DAG, ld_rhs,
false);
1032 device_khL_.create_sptrsv_handle(
1033 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
true);
1034 device_khU_.create_sptrsv_handle(
1035 KokkosSparse::Experimental::SPTRSVAlgorithm::SUPERNODAL_DAG, ld_rhs,
false);
1039 device_khU_.set_sptrsv_column_major (!sptrsv_u_in_csr_);
1042 device_khL_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1043 device_khU_.set_sptrsv_merge_supernodes (sptrsv_merge_supernodes_);
1046 device_khL_.set_sptrsv_invert_diagonal (sptrsv_invert_diag_);
1047 device_khU_.set_sptrsv_invert_diagonal (sptrsv_invert_diag_);
1050 device_khL_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag_);
1051 device_khU_.set_sptrsv_invert_offdiagonal (sptrsv_invert_offdiag_);
1054 device_khL_.set_sptrsv_etree(data_.parents.data());
1055 device_khU_.set_sptrsv_etree(data_.parents.data());
1058 device_khL_.set_sptrsv_perm(data_.perm_r.data());
1059 device_khU_.set_sptrsv_perm(data_.perm_c.data());
1061 int block_size = -1;
1062 if (block_size >= 0) {
1063 std::cout <<
" Block Size : " << block_size << std::endl;
1064 device_khL_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1065 device_khU_.set_sptrsv_diag_supernode_sizes (block_size, block_size);
1070 #ifdef HAVE_AMESOS2_TIMERS 1071 Teuchos::RCP< Teuchos::Time > SymboTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv symbolic");
1072 Teuchos::TimeMonitor numFactTimer(*SymboTimer_);
1074 KokkosSparse::Experimental::sptrsv_symbolic
1075 (&device_khL_, &device_khU_, data_.L, data_.U);
1080 #ifdef HAVE_AMESOS2_TIMERS 1081 Teuchos::RCP< Teuchos::Time > NumerTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for SpTrsv numeric");
1082 Teuchos::TimeMonitor numFactTimer(*NumerTimer_);
1084 KokkosSparse::Experimental::sptrsv_compute
1085 (&device_khL_, &device_khU_, data_.L, data_.U);
1087 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE 1090 template <
class Matrix,
class Vector>
1092 Superlu<Matrix,Vector>::triangular_solve()
const 1094 #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) && defined(KOKKOSKERNELS_ENABLE_TPL_SUPERLU) 1095 size_t ld_rhs = device_xValues_.extent(0);
1096 size_t nrhs = device_xValues_.extent(1);
1098 Kokkos::resize(device_trsv_rhs_, ld_rhs, nrhs);
1099 Kokkos::resize(device_trsv_sol_, ld_rhs, nrhs);
1102 auto local_device_bValues = device_bValues_;
1103 auto local_device_trsv_perm_r = device_trsv_perm_r_;
1104 auto local_device_trsv_rhs = device_trsv_rhs_;
1107 auto local_device_trsv_R = device_trsv_R_;
1108 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1109 KOKKOS_LAMBDA(
size_t j) {
1110 for(
size_t k = 0; k < nrhs; ++k) {
1111 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_trsv_R(j) * local_device_bValues(j,k);
1115 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1116 KOKKOS_LAMBDA(
size_t j) {
1117 for(
size_t k = 0; k < nrhs; ++k) {
1118 local_device_trsv_rhs(local_device_trsv_perm_r(j),k) = local_device_bValues(j,k);
1123 for(
size_t k = 0; k < nrhs; ++k) {
1124 auto sub_sol = Kokkos::subview(device_trsv_sol_, Kokkos::ALL, k);
1125 auto sub_rhs = Kokkos::subview(device_trsv_rhs_, Kokkos::ALL, k);
1129 #ifdef HAVE_AMESOS2_TIMERS 1130 Teuchos::RCP< Teuchos::Time > SpLtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for L solve");
1131 Teuchos::TimeMonitor solveTimer(*SpLtrsvTimer_);
1133 KokkosSparse::Experimental::sptrsv_solve(&device_khL_, sub_sol, sub_rhs);
1137 #ifdef HAVE_AMESOS2_TIMERS 1138 Teuchos::RCP< Teuchos::Time > SpUtrsvTimer_ = Teuchos::TimeMonitor::getNewCounter (
"Time for U solve");
1139 Teuchos::TimeMonitor solveTimer(*SpUtrsvTimer_);
1141 KokkosSparse::Experimental::sptrsv_solve(&device_khU_, sub_rhs, sub_sol);
1146 auto local_device_xValues = device_xValues_;
1147 auto local_device_trsv_perm_c = device_trsv_perm_c_;
1149 auto local_device_trsv_C = device_trsv_C_;
1150 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1151 KOKKOS_LAMBDA(
size_t j) {
1152 for(
size_t k = 0; k < nrhs; ++k) {
1153 local_device_xValues(j,k) = local_device_trsv_C(j) * local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1157 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceExecSpaceType>(0, ld_rhs),
1158 KOKKOS_LAMBDA(
size_t j) {
1159 for(
size_t k = 0; k < nrhs; ++k) {
1160 local_device_xValues(j,k) = local_device_trsv_rhs(local_device_trsv_perm_c(j),k);
1164 #endif // HAVE_AMESOS2_TRIANGULAR_SOLVE 1167 template<
class Matrix,
class Vector>
1173 #endif // AMESOS2_SUPERLU_DEF_HPP Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
Superlu specific solve.
Definition: Amesos2_Superlu_def.hpp:462
A generic helper class for getting a CCS representation of a Matrix.
Definition: Amesos2_Util.hpp:953
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition: Amesos2_Superlu_def.hpp:924
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
int numericFactorization_impl()
Superlu specific numeric factorization.
Definition: Amesos2_Superlu_def.hpp:313
~Superlu()
Destructor.
Definition: Amesos2_Superlu_def.hpp:128
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superlu_def.hpp:780
Amesos2 Superlu declarations.
Definition: Amesos2_TypeDecl.hpp:143
Definition: Amesos2_AbstractConcreteMatrixAdapter.hpp:48
Definition: Amesos2_Superlu_FunctionMap.hpp:74
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
std::string description() const
Returns a short description of this Solver.
Definition: Amesos2_Superlu_def.hpp:151
std::string name() const
Return the name of this solver.
Definition: Amesos2_SolverCore_def.hpp:509
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superlu_def.hpp:664
Definition: Amesos2_TypeDecl.hpp:127
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > ¶meterList)
Definition: Amesos2_Superlu_def.hpp:675
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superlu_def.hpp:205
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:176
Definition: Amesos2_TypeDecl.hpp:128
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using Superlu.
Definition: Amesos2_Superlu_def.hpp:230
Amesos2 interface to the SuperLU package.
Definition: Amesos2_Superlu_decl.hpp:76