46 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ 47 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ 49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 51 #include <Xpetra_Map.hpp> 52 #include <Xpetra_Vector.hpp> 53 #include <Xpetra_MultiVectorFactory.hpp> 54 #include <Xpetra_MapFactory.hpp> 55 #include <Xpetra_VectorFactory.hpp> 57 #include "KokkosKernels_Handle.hpp" 58 #include "KokkosSparse_spgemm.hpp" 59 #include "KokkosSparse_spmv.hpp" 63 #include "MueLu_Aggregates.hpp" 69 #include "MueLu_Utilities.hpp" 71 #include "MueLu_Utilities_kokkos.hpp" 75 namespace NotayUtils {
76 template <
class LocalOrdinal>
78 return min + as<LocalOrdinal>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
81 template <
class LocalOrdinal>
82 void RandomReorder(Teuchos::Array<LocalOrdinal> & list) {
84 LO n = Teuchos::as<LO>(list.size());
85 for(LO i = 0; i < n-1; i++)
86 std::swap(list[i], list[RandomOrdinal(i,n-1)]);
90 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
91 RCP<const ParameterList> NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetValidParameterList()
const {
92 RCP<ParameterList> validParamList = rcp(
new ParameterList());
95 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) 101 #undef SET_VALID_ENTRY 104 validParamList->set< RCP<const FactoryBase> >(
"A", null,
"Generating factory of the matrix");
105 validParamList->set< RCP<const FactoryBase> >(
"Graph", null,
"Generating factory of the graph");
106 validParamList->set< RCP<const FactoryBase> >(
"DofsPerNode", null,
"Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
107 validParamList->set< RCP<const FactoryBase> >(
"AggregateQualities", null,
"Generating factory for variable \'AggregateQualities\'");
110 return validParamList;
113 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
114 void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& currentLevel)
const {
115 const ParameterList& pL = GetParameterList();
117 Input(currentLevel,
"A");
118 Input(currentLevel,
"Graph");
119 Input(currentLevel,
"DofsPerNode");
120 if (pL.get<
bool>(
"aggregation: compute aggregate qualities")) {
121 Input(currentLevel,
"AggregateQualities");
128 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
129 void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel)
const {
130 FactoryMonitor m(*
this,
"Build", currentLevel);
131 using STS = Teuchos::ScalarTraits<Scalar>;
132 using MT =
typename STS::magnitudeType;
134 const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
136 RCP<Teuchos::FancyOStream> out;
137 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
138 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
139 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
141 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
144 const ParameterList& pL = GetParameterList();
146 const MT kappa =
static_cast<MT
>(pL.get<
double>(
"aggregation: Dirichlet threshold"));
147 TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
148 Exceptions::RuntimeError,
149 "Pairwise requires kappa > 2" 150 " otherwise all rows are considered as Dirichlet rows.");
154 if (pL.isParameter(
"aggregation: pairwise: size"))
155 maxNumIter = pL.get<
int>(
"aggregation: pairwise: size");
156 TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
157 Exceptions::RuntimeError,
158 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\"" 159 " must be a strictly positive integer");
162 RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel,
"Graph");
163 RCP<const Matrix> A = Get< RCP<Matrix> >(currentLevel,
"A");
166 RCP<Aggregates> aggregates = rcp(
new Aggregates(*graph));
167 aggregates->setObjectLabel(
"PW");
169 const LO numRows = graph->GetNodeNumVertices();
172 std::vector<unsigned> aggStat(numRows,
READY);
175 const int DofsPerNode = Get<int>(currentLevel,
"DofsPerNode");
176 TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError,
177 "Pairwise only supports one dof per node");
184 std::string orderingStr = pL.get<std::string>(
"aggregation: ordering");
191 ordering = O_NATURAL;
192 if (orderingStr ==
"random" ) ordering = O_RANDOM;
193 else if(orderingStr ==
"natural") {}
194 else if(orderingStr ==
"cuthill-mckee" || orderingStr ==
"cm") ordering = O_CUTHILL_MCKEE;
196 TEUCHOS_TEST_FOR_EXCEPTION(1,Exceptions::RuntimeError,
"Invalid ordering type");
203 Array<LO> orderingVector(numRows);
204 for (LO i = 0; i < numRows; i++)
205 orderingVector[i] = i;
206 if (ordering == O_RANDOM)
207 MueLu::NotayUtils::RandomReorder(orderingVector);
208 else if (ordering == O_CUTHILL_MCKEE) {
210 auto localVector = rcmVector->getData(0);
211 for (LO i = 0; i < numRows; i++)
212 orderingVector[i] = localVector[i];
216 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
217 BuildInitialAggregates(pL, A, orderingVector(), kappa,
218 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
219 TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError,
220 "Initial pairwise aggregation failed to aggregate all nodes");
221 LO numLocalAggregates = aggregates->GetNumAggregates();
222 GetOStream(
Statistics0) <<
"Init : " << numLocalAggregates <<
" - " 223 << A->getNodeNumRows() / numLocalAggregates << std::endl;
226 local_matrix_type intermediateP;
227 local_matrix_type coarseLocalA;
232 LO numLocalDirichletNodes = numDirichletNodes;
233 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
234 BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
235 for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
237 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
238 localVertex2AggId(), intermediateP);
241 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
244 row_sum_type rowSum(
"rowSum", numLocalAggregates);
246 std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
247 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
248 for(LO i=0; i<(LO)numRows; i++) {
251 LO agg=vertex2AggId[i];
252 agg2vertex[agg].push_back(i);
255 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
256 for(LO i = 0; i < numRows; i++) {
260 int agg = vertex2AggId[i];
261 std::vector<LO> & myagg = agg2vertex[agg];
263 size_t nnz = A->getNumEntriesInLocalRow(i);
264 ArrayView<const LO> indices;
265 ArrayView<const SC> vals;
266 A->getLocalRowView(i, indices, vals);
268 SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
269 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
271 if(indices[colidx] < numRows) {
272 for(LO j=0; j<(LO)myagg.size(); j++)
273 if (vertex2AggId[indices[colidx]] == agg)
277 *out <<
"- ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
278 mysum += vals[colidx];
281 *out <<
"- NOT ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
285 rowSum_h[agg] = mysum;
287 Kokkos::deep_copy(rowSum, rowSum_h);
291 Array<LO> localOrderingVector(numRows);
292 for (LO i = 0; i < numRows; i++)
293 localOrderingVector[i] = i;
294 if (ordering == O_RANDOM)
295 MueLu::NotayUtils::RandomReorder(localOrderingVector);
296 else if (ordering == O_CUTHILL_MCKEE) {
298 auto localVector = rcmVector->getData(0);
299 for (LO i = 0; i < numRows; i++)
300 localOrderingVector[i] = localVector[i];
304 numLocalAggregates = 0;
305 numNonAggregatedNodes =
static_cast<LO
>(coarseLocalA.numRows());
306 std::vector<LO> localAggStat(numNonAggregatedNodes,
READY);
307 localVertex2AggId.resize(numNonAggregatedNodes, -1);
308 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
309 localAggStat, localVertex2AggId,
310 numLocalAggregates, numNonAggregatedNodes);
314 numLocalDirichletNodes = 0;
317 RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
318 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
319 for(
size_t vertexIdx = 0; vertexIdx < A->getNodeNumRows(); ++vertexIdx) {
320 LO oldAggIdx = vertex2AggId[vertexIdx];
321 if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
322 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
327 GetOStream(
Statistics0) <<
"Iter " << aggregationIter <<
": " << numLocalAggregates <<
" - " 328 << A->getNodeNumRows() / numLocalAggregates << std::endl;
330 aggregates->SetNumAggregates(numLocalAggregates);
331 aggregates->AggregatesCrossProcessors(
false);
332 aggregates->ComputeAggregateSizes(
true);
335 Set(currentLevel,
"Aggregates", aggregates);
336 GetOStream(
Statistics0) << aggregates->description() << std::endl;
340 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
341 void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
342 BuildInitialAggregates(
const Teuchos::ParameterList& params,
343 const RCP<const Matrix>& A,
344 const Teuchos::ArrayView<const LO> & orderingVector,
345 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
346 Aggregates& aggregates,
347 std::vector<unsigned>& aggStat,
348 LO& numNonAggregatedNodes,
349 LO& numDirichletNodes)
const {
351 Monitor m(*
this,
"BuildInitialAggregates");
352 using STS = Teuchos::ScalarTraits<Scalar>;
353 using MT =
typename STS::magnitudeType;
354 using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
356 RCP<Teuchos::FancyOStream> out;
357 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
358 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
359 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
361 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
365 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
366 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
367 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
368 const MT MT_TWO = MT_ONE + MT_ONE;
369 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
370 const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
372 const MT kappa_init = kappa / (kappa - MT_TWO);
373 const LO numRows = aggStat.size();
374 const int myRank = A->getMap()->getComm()->getRank();
378 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
379 double tie_less = 1.0 - tie_criterion;
380 double tie_more = 1.0 + tie_criterion;
391 const ArrayRCP<const SC> D = ghostedDiag->getData(0);
392 const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
393 const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
396 ArrayRCP<LO> vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0);
397 ArrayRCP<LO> procWinner_rcp = aggregates.GetProcWinner() ->getDataNonConst(0);
398 ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
399 ArrayView<LO> procWinner = procWinner_rcp();
405 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
406 MT aii = STS::magnitude(D[row]);
407 MT rowsum = AbsRs[row];
409 if(aii >= kappa_init * rowsum) {
410 *out <<
"Flagging index " << row <<
" as dirichlet " 411 "aii >= kappa*rowsum = " << aii <<
" >= " << kappa_init <<
" " << rowsum << std::endl;
413 --numNonAggregatedNodes;
420 LO aggIndex = LO_ZERO;
421 for(LO i = 0; i < numRows; i++) {
422 LO current_idx = orderingVector[i];
424 if(aggStat[current_idx] !=
READY)
427 MT best_mu = MT_ZERO;
428 LO best_idx = LO_INVALID;
430 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
431 ArrayView<const LO> indices;
432 ArrayView<const SC> vals;
433 A->getLocalRowView(current_idx, indices, vals);
435 MT aii = STS::real(D[current_idx]);
436 MT si = STS::real(S[current_idx]);
437 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
439 LO col = indices[colidx];
440 SC val = vals[colidx];
441 if(current_idx == col || col >= numRows || aggStat[col] !=
READY || val == SC_ZERO)
444 MT aij = STS::real(val);
445 MT ajj = STS::real(D[col]);
446 MT sj = - STS::real(S[col]);
447 if(aii - si + ajj - sj >= MT_ZERO) {
449 MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
450 MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
451 MT mu = mu_top / mu_bottom;
454 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
455 (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
458 *out <<
"[" << current_idx <<
"] Column UPDATED " << col <<
": " 459 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
460 <<
" = " << aii - si + ajj - sj<<
", aij = "<<aij <<
", mu = " << mu << std::endl;
463 *out <<
"[" << current_idx <<
"] Column NOT UPDATED " << col <<
": " 464 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
465 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
469 *out <<
"[" << current_idx <<
"] Column FAILED " << col <<
": " 470 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
471 <<
" = " << aii - si + ajj - sj << std::endl;
475 if(best_idx == LO_INVALID) {
476 *out <<
"No node buddy found for index " << current_idx
477 <<
" [agg " << aggIndex <<
"]\n" << std::endl;
482 aggStat[current_idx] =
ONEPT;
483 vertex2AggId[current_idx] = aggIndex;
484 procWinner[current_idx] = myRank;
485 numNonAggregatedNodes--;
490 if(best_mu <= kappa) {
491 *out <<
"Node buddies (" << current_idx <<
"," << best_idx <<
") [agg " << aggIndex <<
"]" << std::endl;
495 vertex2AggId[current_idx] = aggIndex;
496 vertex2AggId[best_idx] = aggIndex;
497 procWinner[current_idx] = myRank;
498 procWinner[best_idx] = myRank;
499 numNonAggregatedNodes-=2;
503 *out <<
"No buddy found for index " << current_idx <<
"," 504 " but aggregating as singleton [agg " << aggIndex <<
"]" << std::endl;
506 aggStat[current_idx] =
ONEPT;
507 vertex2AggId[current_idx] = aggIndex;
508 procWinner[current_idx] = myRank;
509 numNonAggregatedNodes--;
515 *out <<
"vertex2aggid :";
516 for(
int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
517 *out << i <<
"(" << vertex2AggId[i] <<
")";
522 aggregates.SetNumAggregates(aggIndex);
525 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
526 void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
527 BuildFurtherAggregates(
const Teuchos::ParameterList& params,
528 const RCP<const Matrix>& A,
529 const Teuchos::ArrayView<const LO> & orderingVector,
530 const typename Matrix::local_matrix_type& coarseA,
531 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
532 const Kokkos::View<
typename Kokkos::ArithTraits<Scalar>::val_type*,
534 typename Matrix::local_matrix_type::device_type>& rowSum,
535 std::vector<LocalOrdinal>& localAggStat,
536 Teuchos::Array<LocalOrdinal>& localVertex2AggID,
537 LO& numLocalAggregates,
538 LO& numNonAggregatedNodes)
const {
539 Monitor m(*
this,
"BuildFurtherAggregates");
542 RCP<Teuchos::FancyOStream> out;
543 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
544 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
545 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
547 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
550 using value_type =
typename local_matrix_type::value_type;
551 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
552 const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
553 const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
554 const magnitude_type MT_two = MT_one + MT_one;
555 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
559 double tie_criterion = params.get<
double>(
"aggregation: pairwise: tie threshold");
560 double tie_less = 1.0 - tie_criterion;
561 double tie_more = 1.0 + tie_criterion;
563 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
564 Kokkos::deep_copy(rowSum_h, rowSum);
569 const LO numRows =
static_cast<LO
>(coarseA.numRows());
570 typename local_matrix_type::values_type::HostMirror diagA_h(
"diagA host", numRows);
571 typename local_matrix_type::row_map_type::HostMirror row_map_h
572 = Kokkos::create_mirror_view(coarseA.graph.row_map);
573 Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
574 typename local_matrix_type::index_type::HostMirror entries_h
575 = Kokkos::create_mirror_view(coarseA.graph.entries);
576 Kokkos::deep_copy(entries_h, coarseA.graph.entries);
577 typename local_matrix_type::values_type::HostMirror values_h
578 = Kokkos::create_mirror_view(coarseA.values);
579 Kokkos::deep_copy(values_h, coarseA.values);
580 for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
581 for(LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
582 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
584 if(rowIdx == static_cast<LO>(entries_h(entryIdx))) {
585 diagA_h(rowIdx) = values_h(entryIdx);
590 for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
591 if(localAggStat[currentIdx] !=
READY) {
595 LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
596 magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
597 const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
598 const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
599 for(
auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
600 const LO colIdx =
static_cast<LO
>(entries_h(entryIdx));
601 if(currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] !=
READY || values_h(entryIdx) == KAT_zero) {
605 const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
606 const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
607 const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx));
608 if(aii - si + ajj - sj >= MT_zero) {
609 const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
610 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
611 const magnitude_type mu = mu_top / mu_bottom;
614 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
615 (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
618 *out <<
"[" << currentIdx <<
"] Column UPDATED " << colIdx <<
": " 619 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
620 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
" mu = " << mu << std::endl;
623 *out <<
"[" << currentIdx <<
"] Column NOT UPDATED " << colIdx <<
": " 624 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
625 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
628 *out <<
"[" << currentIdx <<
"] Column FAILED " << colIdx <<
": " 629 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
630 <<
" = " << aii - si + ajj - sj << std::endl;
634 if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
635 localAggStat[currentIdx] =
ONEPT;
636 localVertex2AggID[currentIdx] = numLocalAggregates;
637 --numNonAggregatedNodes;
638 ++numLocalAggregates;
640 if(best_mu <= kappa) {
641 *out <<
"Node buddies (" << currentIdx <<
"," << bestIdx <<
") [agg " << numLocalAggregates <<
"]" << std::endl;
644 localVertex2AggID[currentIdx] = numLocalAggregates;
645 --numNonAggregatedNodes;
648 localVertex2AggID[bestIdx] = numLocalAggregates;
649 --numNonAggregatedNodes;
651 ++numLocalAggregates;
653 *out <<
"No buddy found for index " << currentIdx <<
"," 654 " but aggregating as singleton [agg " << numLocalAggregates <<
"]" << std::endl;
656 localAggStat[currentIdx] =
ONEPT;
657 localVertex2AggID[currentIdx] = numLocalAggregates;
658 --numNonAggregatedNodes;
659 ++numLocalAggregates;
666 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
667 void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
668 BuildOnRankLocalMatrix(
const typename Matrix::local_matrix_type& localA,
669 typename Matrix::local_matrix_type& onrankA)
const {
670 Monitor m(*
this,
"BuildOnRankLocalMatrix");
673 RCP<Teuchos::FancyOStream> out;
674 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
675 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
676 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
678 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
681 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
682 using values_type =
typename local_matrix_type::values_type;
683 using size_type =
typename local_graph_type::size_type;
684 using col_index_type =
typename local_graph_type::data_type;
685 using array_layout =
typename local_graph_type::array_layout;
686 using memory_traits =
typename local_graph_type::memory_traits;
693 const int numRows =
static_cast<int>(localA.numRows());
694 row_pointer_type rowPtr(
"onrankA row pointer", numRows + 1);
695 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
696 typename local_graph_type::row_map_type::HostMirror origRowPtr_h
697 = Kokkos::create_mirror_view(localA.graph.row_map);
698 typename local_graph_type::entries_type::HostMirror origColind_h
699 = Kokkos::create_mirror_view(localA.graph.entries);
700 typename values_type::HostMirror origValues_h
701 = Kokkos::create_mirror_view(localA.values);
702 Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
703 Kokkos::deep_copy(origColind_h, localA.graph.entries);
704 Kokkos::deep_copy(origValues_h, localA.values);
708 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
709 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
710 if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
712 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
714 Kokkos::deep_copy(rowPtr, rowPtr_h);
716 const LO nnzOnrankA = rowPtr_h(numRows);
719 col_indices_type colInd(
"onrankA column indices", rowPtr_h(numRows));
720 values_type values(
"onrankA values", rowPtr_h(numRows));
721 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
722 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
724 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
726 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
727 if(origColind_h(entryIdx) < numRows) {
728 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
729 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
734 Kokkos::deep_copy(colInd, colInd_h);
735 Kokkos::deep_copy(values, values_h);
737 onrankA = local_matrix_type(
"onrankA", numRows, numRows,
738 nnzOnrankA, values, rowPtr, colInd);
741 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
742 void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
743 BuildIntermediateProlongator(
const LocalOrdinal numRows,
746 const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
747 typename Matrix::local_matrix_type& intermediateP)
const {
748 Monitor m(*
this,
"BuildIntermediateProlongator");
751 RCP<Teuchos::FancyOStream> out;
752 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
753 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
754 out->setShowAllFrontMatter(
false).setShowProcRank(
true);
756 out = Teuchos::getFancyOStream(rcp(
new Teuchos::oblackholestream()));
759 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
760 using values_type =
typename local_matrix_type::values_type;
761 using size_type =
typename local_graph_type::size_type;
762 using col_index_type =
typename local_graph_type::data_type;
763 using array_layout =
typename local_graph_type::array_layout;
764 using memory_traits =
typename local_graph_type::memory_traits;
768 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
770 const int intermediatePnnz = numRows - numDirichletNodes;
771 row_pointer_type rowPtr(
"intermediateP row pointer", numRows + 1);
772 col_indices_type colInd(
"intermediateP column indices", intermediatePnnz);
773 values_type values(
"intermediateP values", intermediatePnnz);
774 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
775 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
778 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
780 if(localVertex2AggID[rowIdx] == LO_INVALID) {
781 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
783 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
784 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
788 Kokkos::deep_copy(rowPtr, rowPtr_h);
789 Kokkos::deep_copy(colInd, colInd_h);
790 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
792 intermediateP = local_matrix_type(
"intermediateP",
793 numRows, numLocalAggregates, intermediatePnnz,
794 values, rowPtr, colInd);
797 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
798 void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
799 BuildCoarseLocalMatrix(
const typename Matrix::local_matrix_type& intermediateP,
800 typename Matrix::local_matrix_type& coarseA)
const {
801 Monitor m(*
this,
"BuildCoarseLocalMatrix");
803 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
804 using values_type =
typename local_matrix_type::values_type;
805 using size_type =
typename local_graph_type::size_type;
806 using col_index_type =
typename local_graph_type::data_type;
807 using array_layout =
typename local_graph_type::array_layout;
808 using memory_traits =
typename local_graph_type::memory_traits;
812 local_matrix_type AP;
813 localSpGEMM(coarseA, intermediateP,
"AP", AP);
826 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt row pointer"),
827 intermediateP.numCols() + 1);
828 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt column indices"),
829 intermediateP.nnz());
830 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt values"),
831 intermediateP.nnz());
833 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
834 typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
835 Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
836 Kokkos::deep_copy(rowPtrPt_h, 0);
837 for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
838 rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
840 for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
841 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
843 Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
845 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
846 Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
847 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
848 Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
849 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
850 Kokkos::deep_copy(valuesP_h, intermediateP.values);
851 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
852 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
853 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
854 Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
856 col_index_type colIdx = 0;
857 for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
858 for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
859 colIdx = entries_h(entryIdxP);
860 for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
861 if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
862 colIndPt_h(entryIdxPt) = rowIdx;
863 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
870 Kokkos::deep_copy(colIndPt, colIndPt_h);
871 Kokkos::deep_copy(valuesPt, valuesPt_h);
874 local_matrix_type intermediatePt(
"intermediatePt",
875 intermediateP.numCols(),
876 intermediateP.numRows(),
878 valuesPt, rowPtrPt, colIndPt);
881 localSpGEMM(intermediatePt, AP,
"coarseA", coarseA);
884 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
885 void NotayAggregationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
886 localSpGEMM(
const typename Matrix::local_matrix_type& A,
887 const typename Matrix::local_matrix_type& B,
888 const std::string matrixLabel,
889 typename Matrix::local_matrix_type& C)
const {
891 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
892 using values_type =
typename local_matrix_type::values_type;
893 using size_type =
typename local_graph_type::size_type;
894 using col_index_type =
typename local_graph_type::data_type;
895 using array_layout =
typename local_graph_type::array_layout;
896 using memory_space =
typename device_type::memory_space;
897 using memory_traits =
typename local_graph_type::memory_traits;
902 int team_work_size = 16;
903 std::string myalg(
"SPGEMM_KK_MEMORY");
904 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
905 KokkosKernels::Experimental::KokkosKernelsHandle<
typename row_pointer_type::const_value_type,
906 typename col_indices_type::const_value_type,
907 typename values_type::const_value_type,
911 kh.create_spgemm_handle(alg_enum);
912 kh.set_team_work_size(team_work_size);
915 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing(
"C row pointer"),
917 col_indices_type colIndC;
921 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
922 B.numRows(), B.numCols(),
923 A.graph.row_map, A.graph.entries,
false,
924 B.graph.row_map, B.graph.entries,
false,
928 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
930 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing(
"C column inds"), nnzC);
931 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing(
"C values"), nnzC);
935 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
936 B.numRows(), B.numCols(),
937 A.graph.row_map, A.graph.entries, A.values,
false,
938 B.graph.row_map, B.graph.entries, B.values,
false,
939 rowPtrC, colIndC, valuesC);
940 kh.destroy_spgemm_handle();
942 C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
948 #endif //ifdef HAVE_MUELU_KOKKOS_REFACTOR
MueLu::DefaultLocalOrdinal LocalOrdinal
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)
Namespace for MueLu classes and methods.
Print statistics that do not involve significant additional computation.
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
#define SET_VALID_ENTRY(name)