MueLu  Version of the Day
MueLu_AlgebraicPermutationStrategy_def.hpp
Go to the documentation of this file.
1 /*
2  * MueLu_AlgebraicPermutationStrategy_def.hpp
3  *
4  * Created on: Feb 20, 2013
5  * Author: tobias
6  */
7 
8 #ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
9 #define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
10 
11 #include <queue>
12 
13 #include <Xpetra_MultiVector.hpp>
14 #include <Xpetra_Matrix.hpp>
15 #include <Xpetra_CrsGraph.hpp>
16 #include <Xpetra_Vector.hpp>
17 #include <Xpetra_MultiVectorFactory.hpp>
18 #include <Xpetra_VectorFactory.hpp>
19 #include <Xpetra_CrsMatrixWrap.hpp>
20 #include <Xpetra_Export.hpp>
21 #include <Xpetra_ExportFactory.hpp>
22 #include <Xpetra_Import.hpp>
23 #include <Xpetra_ImportFactory.hpp>
24 #include <Xpetra_MatrixMatrix.hpp>
25 
26 #include "MueLu_Utilities.hpp"
28 
29 namespace MueLu {
30 
31  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
32  void AlgebraicPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map> permRowMap, Level & currentLevel, const FactoryBase* genFactory) const {
33 
34  const Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm();
35  int numProcs = comm->getSize();
36  int myRank = comm->getRank();
37 
38  size_t nDofsPerNode = 1;
39  if (A->IsView("stridedMaps")) {
40  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
41  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
42  }
43 
44  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
45  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
46  std::vector<Scalar> Weights;
47 
48  // loop over all local rows in matrix A and keep diagonal entries if corresponding
49  // matrix rows are not contained in permRowMap
50  for (size_t row = 0; row < A->getRowMap()->getNodeNumElements(); row++) {
51  GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row);
52 
53  if(permRowMap->isNodeGlobalElement(grow) == true) continue;
54 
55  size_t nnz = A->getNumEntriesInLocalRow(row);
56 
57  // extract local row information from matrix
58  Teuchos::ArrayView<const LocalOrdinal> indices;
59  Teuchos::ArrayView<const Scalar> vals;
60  A->getLocalRowView(row, indices, vals);
61 
62  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
63 
64  // find column entry with max absolute value
65  GlobalOrdinal gMaxValIdx = 0;
66  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
67  MT norm1 = Teuchos::ScalarTraits<MT>::zero ();
68  MT maxVal = Teuchos::ScalarTraits<MT>::zero ();
69  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
70  norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
71  if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
72  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
73  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
74  }
75  }
76 
77  if(grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!!
78  keepDiagonalEntries.push_back(std::make_pair(grow,grow));
79  }
80 
82  // handle rows that are marked to be relevant for permutations
83  for (size_t row = 0; row < permRowMap->getNodeNumElements(); row++) {
84  GlobalOrdinal grow = permRowMap->getGlobalElement(row);
85  LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
86  size_t nnz = A->getNumEntriesInLocalRow(lArow);
87 
88  // extract local row information from matrix
89  Teuchos::ArrayView<const LocalOrdinal> indices;
90  Teuchos::ArrayView<const Scalar> vals;
91  A->getLocalRowView(lArow, indices, vals);
92 
93  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
94 
95  // find column entry with max absolute value
96  GlobalOrdinal gMaxValIdx = 0;
97  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
98  MT norm1 = Teuchos::ScalarTraits<MT>::zero ();
99  MT maxVal = Teuchos::ScalarTraits<MT>::zero ();
100  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
101  norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
102  if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
103  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
104  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
105  }
106  }
107 
108  if(maxVal > Teuchos::ScalarTraits<MT>::zero ()) { // keep only max Entries \neq 0.0
109  permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx));
110  Weights.push_back(maxVal/(norm1*Teuchos::as<Scalar>(nnz)));
111  } else {
112  std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl;
113  }
114 
115  }
116 
117  // sort Weights in descending order
118  std::vector<int> permutation;
119  sortingPermutation(Weights,permutation);
120 
121  // create new vector with exactly one possible entry for each column
122 
123  // each processor which requests the global column id gcid adds 1 to gColVec
124  // gColVec will be summed up over all processors and communicated to gDomVec
125  // which is based on the non-overlapping domain map of A.
126 
127  Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap());
128  Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap());
129  gColVec->putScalar(0.0);
130  gDomVec->putScalar(0.0);
131 
132  // put in all keep diagonal entries
133  for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
134  gColVec->sumIntoGlobalValue((*p).second,1.0);
135  }
136 
137  Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap());
138  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); // communicate blocked gcolids to all procs
139  gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT);
140 
141  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory
142  std::map<GlobalOrdinal, Scalar> gColId2Weight;
143 
144  Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0);
145  for(size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
146  // loop over all candidates
147  std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
148  GlobalOrdinal grow = pp.first;
149  GlobalOrdinal gcol = pp.second;
150 
151  LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
152  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
153  if(Teuchos::ScalarTraits<Scalar>::real (ddata[lcol]) > Teuchos::ScalarTraits<MT>::zero ()){
154  continue; // skip lcol: column already handled by another row
155  }
156 
157  // mark column as already taken
158  ddata[lcol] += Teuchos::ScalarTraits<Scalar>::one ();
159 
160  permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol));
161  gColId2Weight[gcol] = Weights[permutation[i]];
162  }
163 
164  // communicate how often each column index is requested by the different procs
165  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
166  gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); // probably not needed // TODO check me
167 
168  //*****************************************************************************************
169  // first communicate ALL global ids of column indices which are requested by more
170  // than one proc to all other procs
171  // detect which global col indices are requested by more than one proc
172  // and store them in the multipleColRequests vector
173  std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also
174  // requested by another processor. This is possible, since they are stored
175  // in gDomVec which is based on the nonoverlapping domain map. That is, each
176  // global col id is handled by exactly one proc.
177  std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor
178 
179  for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) {
180  Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
181  //
182  // FIXME (mfh 30 Oct 2015) I _think_ it's OK to check just the
183  // real part, because this is a count. (Shouldn't MueLu use
184  // mag_type instead of Scalar here to save space?)
185  //
186  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
187  if(Teuchos::ScalarTraits<Scalar>::real (arrDomVec[sz]) > Teuchos::ScalarTraits<MT>::one ()) {
188  multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
189  } else if(Teuchos::ScalarTraits<Scalar>::real (arrDomVec[sz]) ==
190  Teuchos::ScalarTraits<MT>::zero ()) {
191  unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
192  }
193  }
194 
195  // communicate the global number of column indices which are requested by more than one proc
196  LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
197  LocalOrdinal globalMultColRequests = 0;
198 
199  // sum up all entries in multipleColRequests over all processors
200  MueLu_sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests);
201 
202  if(globalMultColRequests > 0) {
203  // special handling: two processors request the same global column id.
204  // decide which processor gets it
205 
206  // distribute number of multipleColRequests to all processors
207  // each processor stores how many column ids for exchange are handled by the cur proc
208  std::vector<GlobalOrdinal> numMyMultColRequests(numProcs,0);
209  std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs,0);
210  numMyMultColRequests[myRank] = localMultColRequests;
211  Teuchos::reduceAll(*comm,Teuchos::REDUCE_MAX,numProcs,&numMyMultColRequests[0],&numGlobalMultColRequests[0]);
212 
213  // communicate multipleColRequests entries to all processors
214  int nMyOffset = 0;
215  for (int i=0; i<myRank-1; i++)
216  nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights
217 
218  GlobalOrdinal zero=0;
219  std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests,zero);
220  std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests,zero);
221 
222  // loop over all local column GIDs that are also requested by other procs
223  for(size_t i = 0; i < multipleColRequests.size(); i++) {
224  procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ?
225  }
226 
227  // template ordinal, package (double)
228  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests), &procMultRequestedColIds[0], &global_procMultRequestedColIds[0]);
229 
230  // loop over global_procOverlappingWeights and eliminate wrong entries...
231  for (size_t k = 0; k<global_procMultRequestedColIds.size(); k++) {
232  GlobalOrdinal globColId = global_procMultRequestedColIds[k];
233 
234  std::vector<Scalar> MyWeightForColId(numProcs,0);
235  std::vector<Scalar> GlobalWeightForColId(numProcs,0);
236 
237  if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
238  MyWeightForColId[myRank] = gColId2Weight[globColId];
239  } else {
240  MyWeightForColId[myRank] = 0.0;
241  }
242 
243  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &MyWeightForColId[0], &GlobalWeightForColId[0]);
244 
245  if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
246  // note: 2 procs could have the same weight for a column index.
247  // pick the first one.
248  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
249  MT winnerValue = 0.0;
250  int winnerProcRank = 0;
251  for (int proc = 0; proc < numProcs; proc++) {
252  if(Teuchos::ScalarTraits<Scalar>::real (GlobalWeightForColId[proc]) > winnerValue) {
253  winnerValue = Teuchos::ScalarTraits<Scalar>::real (GlobalWeightForColId[proc]);
254  winnerProcRank = proc;
255  }
256  }
257 
258  // winnerProcRank is the winner for handling globColId.
259  // winnerProcRank is unique (even if two procs have the same weight for a column index)
260 
261  if(myRank != winnerProcRank) {
262  // remove corresponding entry from permutedDiagCandidatesFiltered
263  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
264  while(p != permutedDiagCandidatesFiltered.end() )
265  {
266  if((*p).second == globColId)
267  p = permutedDiagCandidatesFiltered.erase(p);
268  else
269  p++;
270  }
271  }
272 
273  } // end if isNodeGlobalElement
274  } // end loop over global_procOverlappingWeights and eliminate wrong entries...
275  } // end if globalMultColRequests > 0
276 
277  // put together all pairs:
278  //size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size();
279  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
280  RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
281  RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
282 
283 #ifdef DEBUG_OUTPUT
284  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
285  // plausibility check
286  gColVec->putScalar(0.0);
287  gDomVec->putScalar(0.0);
288  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
289  while(pl != RowColPairs.end() )
290  {
291  //GlobalOrdinal ik = (*pl).first;
292  GlobalOrdinal jk = (*pl).second;
293 
294  gColVec->sumIntoGlobalValue(jk,1.0);
295  pl++;
296  }
297  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
298  for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) {
299  Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
300  if(arrDomVec[sz] > 1.0) {
301  GetOStream(Runtime0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl;
302  } else if(arrDomVec[sz] == 0.0) {
303  GetOStream(Runtime0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl;
304  }
305  }
306  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
307 #endif
308 
310  // assumption: on each processor RowColPairs now contains
311  // a valid set of (row,column) pairs, where the row entries
312  // are a subset of the processor's rows and the column entries
313  // are unique throughout all processors.
314  // Note: the RowColPairs are only defined for a subset of all rows,
315  // so there might be rows without an entry in RowColPairs.
316  // It can be, that some rows seem to be missing in RowColPairs, since
317  // the entry in that row with maximum absolute value has been reserved
318  // by another row already (e.g. as already diagonal dominant row outside
319  // of perRowMap).
320  // In fact, the RowColPairs vector only defines the (row,column) pairs
321  // that will be definitely moved to the diagonal after permutation.
322 
323 #ifdef DEBUG_OUTPUT
324  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) {
325  // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl;
326  // }
327  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p)
328  // {
330  // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl;
331  // }
332  // std::cout << "\n";
333 #endif
334 
335  // vectors to store permutation information
336  Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
337  Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
338  Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
339 
340  Teuchos::ArrayRCP< Scalar > PpermData = Pperm->getDataNonConst(0);
341  Teuchos::ArrayRCP< Scalar > QpermData = Qperm->getDataNonConst(0);
342 
343  Pperm->putScalar(0.0);
344  Qperm->putScalar(0.0);
345  lQperm->putScalar(0.0);
346 
347  // setup exporter for Qperm
348  Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap());
349 
350  Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap());
351  Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
352  Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
353  Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use
354  Teuchos::ArrayRCP< Scalar > RowIdStatusArray = RowIdStatus->getDataNonConst(0);
355  Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0);
356  Teuchos::ArrayRCP< Scalar > lColIdStatusArray = lColIdStatus->getDataNonConst(0);
357  Teuchos::ArrayRCP< Scalar > ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this
358  RowIdStatus->putScalar(0.0);
359  ColIdStatus->putScalar(0.0);
360  lColIdStatus->putScalar(0.0);
361  ColIdUsed->putScalar(0.0); // no column ids are used
362 
363  // count wide-range permutations
364  // a wide-range permutation is defined as a permutation of rows/columns which do not
365  // belong to the same node
366  LocalOrdinal lWideRangeRowPermutations = 0;
367  GlobalOrdinal gWideRangeRowPermutations = 0;
368  LocalOrdinal lWideRangeColPermutations = 0;
369  GlobalOrdinal gWideRangeColPermutations = 0;
370 
371  // run 1: mark all "identity" permutations
372  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
373  while(p != RowColPairs.end() )
374  {
375  GlobalOrdinal ik = (*p).first;
376  GlobalOrdinal jk = (*p).second;
377 
378  LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
379  LocalOrdinal ljk = A->getColMap()->getLocalElement(jk);
380 
381  if(RowIdStatusArray[lik] == 0.0) {
382  RowIdStatusArray[lik] = 1.0; // use this row id
383  lColIdStatusArray[ljk] = 1.0; // use this column id
384  Pperm->replaceLocalValue(lik, ik);
385  lQperm->replaceLocalValue(ljk, ik); // use column map
386  ColIdUsed->replaceGlobalValue(ik,1.0); // ik is now used
387  p = RowColPairs.erase(p);
388 
389  // detect wide range permutations
390  if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) {
391  lWideRangeColPermutations++;
392  }
393  }
394  else
395  p++;
396  }
397 
398  // communicate column map -> domain map
399  Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX);
400  ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
401 
402  // plausibility check
403  if(RowColPairs.size()>0) GetOStream(Warnings0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me
404 
405  // close Pperm
406 
407  // count, how many row permutations are missing on current proc
408  size_t cntFreeRowIdx = 0;
409  std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows
410  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
411  if(RowIdStatusArray[lik] == 0.0) {
412  cntFreeRowIdx++;
413  qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
414  }
415  }
416 
417  // fix Pperm
418  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
419  if(RowIdStatusArray[lik] == 0.0) {
420  RowIdStatusArray[lik] = 1.0; // use this row id
421  Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
422  // detect wide range permutations
423  if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) {
424  lWideRangeRowPermutations++;
425  }
426  qFreeGRowIdx.pop();
427  }
428  }
429 
430  // close Qperm (free permutation entries in Qperm)
431  size_t cntFreeColIdx = 0;
432  std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns
433  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
434  if(ColIdStatusArray[ljk] == 0.0) {
435  cntFreeColIdx++;
436  qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
437  }
438  }
439 
440  size_t cntUnusedColIdx = 0;
441  std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns
442  for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
443  if(ColIdUsedArray[ljk] == 0.0) {
444  cntUnusedColIdx++;
445  qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
446  }
447  }
448 
449  // fix Qperm with local entries
450  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
451  // stop if no (local) unused column idx are left
452  if(cntUnusedColIdx == 0) break;
453 
454  if(ColIdStatusArray[ljk] == 0.0) {
455  ColIdStatusArray[ljk] = 1.0; // use this row id
456  Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map)
457  ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),1.0); // ljk is now used, too
458  // detect wide range permutations
459  if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
460  lWideRangeColPermutations++;
461  }
462  qUnusedGColIdx.pop();
463  cntUnusedColIdx--;
464  cntFreeColIdx--;
465  }
466  }
467 
468  //Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally
469  //ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
470 
471  // count, how many unused column idx are needed on current processor
472  // to complete Qperm
473  cntFreeColIdx = 0;
474  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop
475  if(ColIdStatusArray[ljk] == 0.0) {
476  cntFreeColIdx++;
477  }
478  }
479 
480  GlobalOrdinal global_cntFreeColIdx = 0;
481  LocalOrdinal local_cntFreeColIdx = cntFreeColIdx;
482  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
483 #ifdef DEBUG_OUTPUT
484  std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
485 #endif
486 
487  // avoid global communication if possible
488  if(global_cntFreeColIdx > 0) {
489 
490  // 1) count how many unused column ids are left
491  GlobalOrdinal global_cntUnusedColIdx = 0;
492  LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx;
493  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
494 #ifdef DEBUG_OUTPUT
495  std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
496 #endif
497 
498  // 2) communicate how many unused column ids are available on procs
499  std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs);
500  std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
501  local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
502  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_UnusedColIdxOnProc[0], &global_UnusedColIdxOnProc[0]);
503 
504 #ifdef DEBUG_OUTPUT
505  std::cout << "PROC " << myRank << " global num unused indices per proc: ";
506  for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
507  std::cout << " " << global_UnusedColIdxOnProc[ljk];
508  }
509  std::cout << std::endl;
510 #endif
511 
512  // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx
513  std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
514  std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
515  GlobalOrdinal global_cntUnusedColIdxStartIter = 0;
516  for(int proc=0; proc<myRank; proc++) {
517  global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
518  }
519  for(GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) {
520  local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
521  qUnusedGColIdx.pop();
522  }
523  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx), &local_UnusedColIdxVector[0], &global_UnusedColIdxVector[0]);
524 #ifdef DEBUG_OUTPUT
525  std::cout << "PROC " << myRank << " global UnusedGColIdx: ";
526  for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
527  std::cout << " " << global_UnusedColIdxVector[ljk];
528  }
529  std::cout << std::endl;
530 #endif
531 
532 
533 
534  // 4) communicate, how many column idx are needed on each processor
535  // to complete Qperm
536  std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs);
537  std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
538  local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
539  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_EmptyColIdxOnProc[0], &global_EmptyColIdxOnProc[0]);
540 
541 #ifdef DEBUG_OUTPUT
542  std::cout << "PROC " << myRank << " global num of needed column indices: ";
543  for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
544  std::cout << " " << global_EmptyColIdxOnProc[ljk];
545  }
546  std::cout << std::endl;
547 #endif
548 
549  // 5) determine first index in global_UnusedColIdxVector for unused column indices,
550  // that are marked to be used by this processor
551  GlobalOrdinal global_UnusedColStartIdx = 0;
552  for(int proc=0; proc<myRank; proc++) {
553  global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
554  }
555 
556 #ifdef DEBUG_OUTPUT
557  GetOStream(Statistics0) << "PROC " << myRank << " is allowd to use the following column gids: ";
558  for(GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
559  GetOStream(Statistics0) << global_UnusedColIdxVector[k] << " ";
560  }
561  GetOStream(Statistics0) << std::endl;
562 #endif
563 
564  // 6.) fix Qperm with global entries
565  GlobalOrdinal array_iter = 0;
566  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
567 
568  if(ColIdStatusArray[ljk] == 0.0) {
569  ColIdStatusArray[ljk] = 1.0; // use this row id
570  Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
571  ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],1.0);
572  // detect wide range permutations
573  if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
574  lWideRangeColPermutations++;
575  }
576  array_iter++;
577  //cntUnusedColIdx--; // check me
578  }
579  }
580  } // end if global_cntFreeColIdx > 0
582 
583 
584  // create new empty Matrix
585  Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile));
586  Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile));
587 
588  for(size_t row=0; row<A->getNodeNumRows(); row++) {
589  // FIXME (mfh 30 Oct 2015): Teuchos::as doesn't know how to
590  // convert from complex Scalar to GO, so we have to take the real
591  // part first. I think that's the right thing to do in this case.
592  Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(PpermData[row]))); // column idx for Perm^T
593  Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(QpermData[row]))); // column idx for Qperm
594  Teuchos::ArrayRCP<Scalar> valout(1,Teuchos::ScalarTraits<Scalar>::one());
595  permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size()));
596  permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size()));
597  }
598 
599  permPTmatrix->fillComplete();
600  permQTmatrix->fillComplete();
601 
602  Teuchos::RCP<Matrix> permPmatrix = Utilities::Transpose(*permPTmatrix, true);
603 
604  for(size_t row=0; row<permPTmatrix->getNodeNumRows(); row++) {
605  if(permPTmatrix->getNumEntriesInLocalRow(row) != 1)
606  GetOStream(Warnings0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
607  if(permPmatrix->getNumEntriesInLocalRow(row) != 1)
608  GetOStream(Warnings0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
609  if(permQTmatrix->getNumEntriesInLocalRow(row) != 1)
610  GetOStream(Warnings0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
611  }
612 
613  // build permP * A * permQT
614  Teuchos::RCP<Matrix> ApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *permQTmatrix, false, GetOStream(Statistics2));
615  Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2));
616 
617  /*
618  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
619  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
620  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
621  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
622  */
623  // build scaling matrix
624  Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
625  Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
626  Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0);
627  Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0);
628 
629  permPApermQt->getLocalDiagCopy(*diagVec);
630  for(size_t i = 0; i<diagVec->getMap()->getNodeNumElements(); ++i) {
631  if(diagVecData[i] != 0.0)
632  invDiagVecData[i] = Teuchos::ScalarTraits<Scalar>::one () / diagVecData[i];
633  else {
634  invDiagVecData[i] = Teuchos::ScalarTraits<Scalar>::one ();
635  GetOStream(Statistics0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
636  }
637  }
638 
639  Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1,Xpetra::StaticProfile));
640 
641  for(size_t row=0; row<A->getNodeNumRows(); row++) {
642  Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
643  Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]);
644  diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size()));
645  }
646  diagScalingOp->fillComplete();
647 
648  Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2));
649  currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory/*this*/);
650 
651  currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory/*this*/); // TODO careful with this!!!
652  currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory/*this*/);
653  currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory/*this*/);
654  currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory/*this*/);
655 
657  // count zeros on diagonal in P -> number of row permutations
658  Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true);
659  permPmatrix->getLocalDiagCopy(*diagPVec);
660  Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0);
661  LocalOrdinal lNumRowPermutations = 0;
662  GlobalOrdinal gNumRowPermutations = 0;
663  for(size_t i = 0; i<diagPVec->getMap()->getNodeNumElements(); ++i) {
664  if(diagPVecData[i] == 0.0) {
665  lNumRowPermutations++;
666  }
667  }
668 
669  // sum up all entries in multipleColRequests over all processors
670  MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
671 
673  // count zeros on diagonal in Q^T -> number of column permutations
674  Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true);
675  permQTmatrix->getLocalDiagCopy(*diagQTVec);
676  Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0);
677  LocalOrdinal lNumColPermutations = 0;
678  GlobalOrdinal gNumColPermutations = 0;
679  for(size_t i = 0; i<diagQTVec->getMap()->getNodeNumElements(); ++i) {
680  if(diagQTVecData[i] == 0.0) {
681  lNumColPermutations++;
682  }
683  }
684 
685  // sum up all entries in multipleColRequests over all processors
686  MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
687 
688  currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/);
689  currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/);
690  currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory/*this*/);
691  currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory/*this*/);
692 
693  GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
694  GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
695  GetOStream(Runtime1) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl;
696 }
697 
698 } // namespace MueLu
699 
700 #endif /* MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_ */
Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Print even more statistics.
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)