46 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP 47 #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP 49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 54 #include <Teuchos_Comm.hpp> 55 #include <Teuchos_CommHelpers.hpp> 57 #include <Xpetra_Vector.hpp> 61 #include "MueLu_Aggregates_kokkos.hpp" 63 #include "MueLu_LWGraph_kokkos.hpp" 66 #include "Kokkos_Sort.hpp" 71 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
72 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
73 BuildAggregates(
const Teuchos::ParameterList& params,
74 const LWGraph_kokkos& graph,
75 Aggregates_kokkos& aggregates,
77 LO& numNonAggregatedNodes)
const {
79 int minNodesPerAggregate = params.get<
int> (
"aggregation: min agg size");
80 int maxNodesPerAggregate = params.get<
int> (
"aggregation: max agg size");
82 TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate,
83 Exceptions::RuntimeError,
84 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
91 if(params.get<
bool>(
"aggregation: deterministic"))
93 Monitor m(*
this,
"BuildAggregatesDeterministic");
94 BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
95 aggregates, aggStat, numNonAggregatedNodes);
97 Monitor m(*
this,
"BuildAggregatesRandom");
98 BuildAggregatesRandom(maxNodesPerAggregate, graph,
99 aggregates, aggStat, numNonAggregatedNodes);
104 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
105 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
106 BuildAggregatesRandom(
const LO maxAggSize,
107 const LWGraph_kokkos& graph,
108 Aggregates_kokkos& aggregates,
110 LO& numNonAggregatedNodes)
const 112 const LO numRows = graph.GetNodeNumVertices();
113 const int myRank = graph.GetComm()->getRank();
116 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
117 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
118 auto colors = aggregates.GetGraphColors();
120 LO numAggregatedNodes = 0;
121 LO numLocalAggregates = aggregates.GetNumAggregates();
123 Kokkos::deep_copy(aggCount, numLocalAggregates);
124 Kokkos::parallel_for(
"Aggregation Phase 1: initial reduction over color == 1",
126 KOKKOS_LAMBDA (
const LO nodeIdx) {
127 if(colors(nodeIdx) == 1 && aggStat(nodeIdx) ==
READY) {
128 const LO aggIdx = Kokkos::atomic_fetch_add (&aggCount(), 1);
129 vertex2AggId(nodeIdx, 0) = aggIdx;
131 procWinner(nodeIdx, 0) = myRank;
137 numAggregatedNodes -= numLocalAggregates;
138 Kokkos::deep_copy(numLocalAggregates, aggCount);
139 numAggregatedNodes += numLocalAggregates;
149 auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
150 Kokkos::parallel_for(
"Aggregation Phase 1: compute initial aggregates size",
152 KOKKOS_LAMBDA (
const LO nodeIdx) {
153 auto aggSizesScatterViewAccess = aggSizesScatterView.access();
154 if(vertex2AggId(nodeIdx, 0) >= 0)
155 aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
157 Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
160 LO tmpNumAggregatedNodes = 0;
161 Kokkos::parallel_reduce(
"Aggregation Phase 1: main parallel_reduce over aggSizes",
163 KOKKOS_LAMBDA (
const size_t nodeIdx, LO & lNumAggregatedNodes) {
164 if(colors(nodeIdx) != 1
165 && (aggStat(nodeIdx) ==
READY || aggStat(nodeIdx) ==
NOTSEL)) {
168 auto neighbors = graph.getNeighborVertices(nodeIdx);
169 for(LO j = 0; j < neighbors.length; ++j) {
170 auto nei = neighbors.colidx(j);
171 if(graph.isLocalNeighborVertex(nei) && colors(nei) == 1
176 LO agg = vertex2AggId(nei, 0);
177 const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg),
179 if(aggSize < maxAggSize) {
181 vertex2AggId(nodeIdx, 0) = agg;
182 procWinner(nodeIdx, 0) = myRank;
184 ++lNumAggregatedNodes;
188 Kokkos::atomic_decrement(&aggSizesView(agg));
195 if(aggStat(nodeIdx) ==
NOTSEL) { aggStat(nodeIdx) =
READY; }
197 }, tmpNumAggregatedNodes);
198 numAggregatedNodes += tmpNumAggregatedNodes;
199 numNonAggregatedNodes -= numAggregatedNodes;
202 aggregates.SetNumAggregates(numLocalAggregates);
205 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
206 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
207 BuildAggregatesDeterministic(
const LO maxAggSize,
208 const LWGraph_kokkos& graph,
209 Aggregates_kokkos& aggregates,
211 LO& numNonAggregatedNodes)
const 213 const LO numRows = graph.GetNodeNumVertices();
214 const int myRank = graph.GetComm()->getRank();
216 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
217 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
218 auto colors = aggregates.GetGraphColors();
220 LO numLocalAggregates = aggregates.GetNumAggregates();
223 auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
224 h_nla() = numLocalAggregates;
225 Kokkos::deep_copy(numLocalAggregatesView, h_nla);
230 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
233 Kokkos::parallel_for(
"Aggregation Phase 1: building list of new roots",
235 KOKKOS_LAMBDA(
const LO i)
237 if(colors(i) == 1 && aggStat(i) ==
READY)
240 newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
243 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
245 Kokkos::sort(newRoots, 0, h_numNewRoots());
246 LO numAggregated = 0;
247 Kokkos::parallel_reduce(
"Aggregation Phase 1: aggregating nodes",
249 KOKKOS_LAMBDA(
const LO rootIndex, LO& lnumAggregated)
251 LO root = newRoots(rootIndex);
252 LO aggID = numLocalAggregatesView() + rootIndex;
254 vertex2AggId(root, 0) = aggID;
255 procWinner(root, 0) = myRank;
257 auto neighOfRoot = graph.getNeighborVertices(root);
258 for(LO n = 0; n < neighOfRoot.length; n++)
260 LO neigh = neighOfRoot(n);
261 if (graph.isLocalNeighborVertex(neigh) && aggStat(neigh) ==
READY)
264 vertex2AggId(neigh, 0) = aggID;
265 procWinner(neigh, 0) = myRank;
268 if(aggSize == maxAggSize)
275 lnumAggregated += aggSize;
277 numNonAggregatedNodes -= numAggregated;
279 aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());
284 #endif // HAVE_MUELU_KOKKOS_REFACTOR 285 #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
Namespace for MueLu classes and methods.