MueLu  Version of the Day
MueLu_RepartitionHeuristicFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 // Tobias Wiesner (tawiesn@sandia.gov)
43 //
44 // ***********************************************************************
45 //
46 // @HEADER
47 #ifndef PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
48 #define PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
49 
50 #include <algorithm>
51 #include <iostream>
52 #include <sstream>
53 
54 #ifdef HAVE_MPI
55 #include <Teuchos_DefaultMpiComm.hpp>
56 #include <Teuchos_CommHelpers.hpp>
57 
58 //#include <Xpetra_Map.hpp>
59 #include <Xpetra_Matrix.hpp>
60 
61 #include "MueLu_Utilities.hpp"
62 
63 #include "MueLu_RAPFactory.hpp"
64 #include "MueLu_BlockedRAPFactory.hpp"
65 #include "MueLu_SubBlockAFactory.hpp"
66 #include "MueLu_Level.hpp"
67 #include "MueLu_MasterList.hpp"
68 #include "MueLu_Monitor.hpp"
69 
70 #include "MueLu_RepartitionHeuristicFactory.hpp"
71 
72 namespace MueLu {
73 
74  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
76  RCP<ParameterList> validParamList = rcp(new ParameterList());
77 
78 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
79  SET_VALID_ENTRY("repartition: start level");
80  SET_VALID_ENTRY("repartition: min rows per proc");
81  SET_VALID_ENTRY("repartition: target rows per proc");
82  SET_VALID_ENTRY("repartition: max imbalance");
83 #undef SET_VALID_ENTRY
84 
85  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Factory of the matrix A");
86 
87  return validParamList;
88  }
89 
90  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
92  Input(currentLevel, "A");
93  }
94 
95  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
97  FactoryMonitor m(*this, "Build", currentLevel);
98 
99  const Teuchos::ParameterList & pL = GetParameterList();
100  // Access parameters here to make sure that we set the parameter entry flag to "used" even in case of short-circuit evaluation.
101  // TODO (JG): I don't really know if we want to do this.
102  const int startLevel = pL.get<int> ("repartition: start level");
103  const LO minRowsPerProcess = pL.get<LO> ("repartition: min rows per proc");
104  LO targetRowsPerProcess = pL.get<LO> ("repartition: target rows per proc");
105  const double nonzeroImbalance = pL.get<double>("repartition: max imbalance");
106 
107  if (targetRowsPerProcess == 0)
108  targetRowsPerProcess = minRowsPerProcess;
109 
110  RCP<const FactoryBase> Afact = GetFactory("A");
111  if(!Afact.is_null() && Teuchos::rcp_dynamic_cast<const RAPFactory>(Afact) == Teuchos::null &&
112  Teuchos::rcp_dynamic_cast<const BlockedRAPFactory>(Afact) == Teuchos::null &&
113  Teuchos::rcp_dynamic_cast<const SubBlockAFactory>(Afact) == Teuchos::null) {
114  GetOStream(Warnings) <<
115  "MueLu::RepartitionHeuristicFactory::Build: The generation factory for A must " \
116  "be a RAPFactory or a SubBlockAFactory providing the non-rebalanced matrix information! " \
117  "It specifically must not be of type Rebalance(Blocked)AcFactory or similar. " \
118  "Please check the input. Make also sure that \"number of partitions\" is provided to " \
119  "the Interface class and the RepartitionFactory instance. Instead, we have a "<<Afact->description() << std::endl;
120  }
121  // TODO: We only need a CrsGraph. This class does not have to be templated on Scalar types.
122  RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
123 
124  // ======================================================================================================
125  // Determine whether partitioning is needed
126  // ======================================================================================================
127  // NOTE: most tests include some global communication, which is why we currently only do tests until we make
128  // a decision on whether to repartition. However, there is value in knowing how "close" we are to having to
129  // rebalance an operator. So, it would probably be beneficial to do and report *all* tests.
130 
131  // Test1: skip repartitioning if current level is less than the specified minimum level for repartitioning
132  if (currentLevel.GetLevelID() < startLevel) {
133  GetOStream(Statistics1) << "Repartitioning? NO:" <<
134  "\n current level = " << Teuchos::toString(currentLevel.GetLevelID()) <<
135  ", first level where repartitioning can happen is " + Teuchos::toString(startLevel) << std::endl;
136 
137  // a negative number of processors means: no repartitioning
138  Set(currentLevel, "number of partitions", -1);
139 
140  return;
141  }
142 
143  RCP<const Map> rowMap = A->getRowMap();
144 
145  RCP<const Teuchos::Comm<int> > origComm = rowMap->getComm();
146  RCP<const Teuchos::Comm<int> > comm = origComm;
147 
148  // Test 2: check whether A is actually distributed, i.e. more than one processor owns part of A
149  // TODO: this global communication can be avoided if we store the information with the matrix (it is known when matrix is created)
150  // TODO: further improvements could be achieved when we use subcommunicator for the active set. Then we only need to check its size
151  {
152  int numActiveProcesses = 0;
153  MueLu_sumAll(comm, Teuchos::as<int>((A->getNodeNumRows() > 0) ? 1 : 0), numActiveProcesses);
154 
155  if (numActiveProcesses == 1) {
156  GetOStream(Statistics1) << "Repartitioning? NO:" <<
157  "\n # processes with rows = " << Teuchos::toString(numActiveProcesses) << std::endl;
158 
159  Set(currentLevel, "number of partitions", 1);
160  return;
161  }
162  }
163 
164  bool test3 = false, test4 = false;
165  std::string msg3, msg4;
166 
167  // Test3: check whether number of rows on any processor satisfies the minimum number of rows requirement
168  // NOTE: Test2 ensures that repartitionning is not done when there is only one processor (it may or may not satisfy Test3)
169  if (minRowsPerProcess > 0) {
170  LO numMyRows = Teuchos::as<LO>(A->getNodeNumRows()), minNumRows, LOMAX = Teuchos::OrdinalTraits<LO>::max();
171  LO haveFewRows = (numMyRows < minRowsPerProcess ? 1 : 0), numWithFewRows = 0;
172  MueLu_sumAll(comm, haveFewRows, numWithFewRows);
173  MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows);
174 
175  // TODO: we could change it to repartition only if the number of processors with numRows < minNumRows is larger than some
176  // percentage of the total number. This way, we won't repartition if 2 out of 1000 processors don't have enough elements.
177  // I'm thinking maybe 20% threshold. To implement, simply add " && numWithFewRows < .2*numProcs" to the if statement.
178  if (numWithFewRows > 0)
179  test3 = true;
180 
181  msg3 = "\n min # rows per proc = " + Teuchos::toString(minNumRows) + ", min allowable = " + Teuchos::toString(minRowsPerProcess);
182  }
183 
184  // Test4: check whether the balance in the number of nonzeros per processor is greater than threshold
185  if (!test3) {
186  GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getNodeNumEntries());
187  MueLu_maxAll(comm, numMyNnz, maxNnz);
188  MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz); // min nnz over all active processors
189  double imbalance = Teuchos::as<double>(maxNnz)/minNnz;
190 
191  if (imbalance > nonzeroImbalance)
192  test4 = true;
193 
194  msg4 = "\n nonzero imbalance = " + Teuchos::toString(imbalance) + ", max allowable = " + Teuchos::toString(nonzeroImbalance);
195  }
196 
197  if (!test3 && !test4) {
198  GetOStream(Statistics1) << "Repartitioning? NO:" << msg3 + msg4 << std::endl;
199 
200  // A negative number of partitions means: no repartitioning
201  Set(currentLevel, "number of partitions", -1);
202  return;
203  }
204 
205  GetOStream(Statistics1) << "Repartitioning? YES:" << msg3 + msg4 << std::endl;
206 
207  // ======================================================================================================
208  // Calculate number of partitions
209  // ======================================================================================================
210  // FIXME Quick way to figure out how many partitions there should be (same algorithm as ML)
211  // FIXME Should take into account nnz? Perhaps only when user is using min #nnz per row threshold.
212 
213  // The number of partitions is calculated by the RepartitionFactory and stored in "number of partitions" variable on
214  // the current level. If this variable is already set (e.g., by another instance of RepartitionFactory) then this number
215  // is used. The "number of partitions" variable serves as basic communication between the RepartitionFactory (which
216  // requests a certain number of partitions) and the *Interface classes which call the underlying partitioning algorithms
217  // and produce the "Partition" array with the requested number of partitions.
218  const auto globalNumRows = Teuchos::as<GO>(A->getGlobalNumRows());
219  int numPartitions = 1;
220  if (globalNumRows >= targetRowsPerProcess) {
221  // Make sure that each CPU thread has approximately targetRowsPerProcess
222 
223  int thread_per_mpi_rank = 1;
224 #if defined(HAVE_MUELU_KOKKOSCORE) && defined(KOKKOS_ENABLE_OPENMP)
225  using execution_space = typename Node::device_type::execution_space;
226  if (std::is_same<execution_space, Kokkos::OpenMP>::value)
227  thread_per_mpi_rank = execution_space::concurrency();
228 #endif
229 
230  numPartitions = std::max(Teuchos::as<int>(globalNumRows / (targetRowsPerProcess * thread_per_mpi_rank)), 1);
231  }
232  numPartitions = std::min(numPartitions, comm->getSize());
233 
234  Set(currentLevel, "number of partitions", numPartitions);
235 
236  GetOStream(Statistics1) << "Number of partitions to use = " << numPartitions << std::endl;
237  } // Build
238 } // namespace MueLu
239 
240 #endif //ifdef HAVE_MPI
241 #endif /* PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_ */
#define MueLu_sumAll(rcpComm, in, out)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
#define MueLu_maxAll(rcpComm, in, out)
void DeclareInput(Level &currentLevel) const
Determines the data that RepartitionHeuristicFactory needs, and the factories that generate that data...
Timer to be used in factories. Similar to Monitor but with additional timers.
Print more statistics.
void Build(Level &currentLevel) const
Build an object with this factory.
#define SET_VALID_ENTRY(name)
Namespace for MueLu classes and methods.
#define MueLu_minAll(rcpComm, in, out)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Factory for building a thresholded operator.
Print all warning messages.
Factory for building coarse matrices.