Interface Documentation
Version: invalid
dcrs_utils.hh
Go to the documentation of this file.
1 /*
2  @@@@@@@@ @@ @@@@@@ @@@@@@@@ @@
3  /@@///// /@@ @@////@@ @@////// /@@
4  /@@ /@@ @@@@@ @@ // /@@ /@@
5  /@@@@@@@ /@@ @@///@@/@@ /@@@@@@@@@/@@
6  /@@//// /@@/@@@@@@@/@@ ////////@@/@@
7  /@@ /@@/@@//// //@@ @@ /@@/@@
8  /@@ @@@//@@@@@@ //@@@@@@ @@@@@@@@ /@@
9  // /// ////// ////// //////// //
10 
11  Copyright (c) 2016, Los Alamos National Security, LLC
12  All rights reserved.
13  */
14 #pragma once
15 
18 #include <flecsi-config.h>
19 
25 #include "flecsi/util/crs.hh"
27 
28 #if !defined(FLECSI_ENABLE_MPI)
29 #error FLECSI_ENABLE_MPI not defined! This file depends on MPI!
30 #endif
31 
32 #include <mpi.h>
33 
34 #include <map>
35 
36 namespace flecsi {
37 
38 inline log::devel_tag dcrs_utils_tag("dcrs_utils");
39 
40 namespace topo {
41 namespace unstructured_impl {
42 
52 template<size_t DIMENSION, size_t MESH_DIMENSION>
53 inline std::set<size_t>
55  std::set<size_t> indices;
56 
57  {
58  log::devel_guard guard(dcrs_utils_tag);
59 
60  int size;
61  int rank;
62 
63  MPI_Comm_size(MPI_COMM_WORLD, &size);
64  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
65 
66  //------------------------------------------------------------------------//
67  // Create a naive initial distribution of the indices
68  //------------------------------------------------------------------------//
69 
70  size_t quot = md.num_entities(DIMENSION) / size;
71  size_t rem = md.num_entities(DIMENSION) % size;
72 
73  flog(info) << "quot: " << quot << " rem: " << rem << std::endl;
74 
75  // Each rank gets the average number of indices, with higher ranks
76  // getting an additional index for non-zero remainders.
77  size_t init_indices = quot + ((size_t(rank) >= (size - rem)) ? 1 : 0);
78 
79  size_t offset(0);
80  for(int r(0); r < rank; ++r) {
81  offset += quot + ((size_t(r) >= (size - rem)) ? 1 : 0);
82  } // for
83 
84  flog(info) << "offset: " << offset << std::endl;
85 
86  for(size_t i(0); i < init_indices; ++i) {
87  indices.insert(offset + i);
88  flog(info) << "inserting: " << offset + i << std::endl;
89  } // for
90  } // guard
91 
92  return indices;
93 } // naive_coloring
94 
112 template<std::size_t DIMENSION,
113  std::size_t FROM_DIMENSION = DIMENSION,
114  std::size_t TO_DIMENSION = DIMENSION,
115  std::size_t THRU_DIMENSION = DIMENSION - 1>
116 inline util::dcrs
118  int size;
119  int rank;
120 
121  MPI_Comm_size(MPI_COMM_WORLD, &size);
122  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
123 
124  //--------------------------------------------------------------------------//
125  // Create a naive initial distribution of the indices
126  //--------------------------------------------------------------------------//
127 
128  size_t quot = md.num_entities(FROM_DIMENSION) / size;
129  size_t rem = md.num_entities(FROM_DIMENSION) % size;
130 
131  // Each rank gets the average number of indices, with higher ranks
132  // getting an additional index for non-zero remainders.
133  size_t init_indices = quot + ((size_t(rank) >= (size - rem)) ? 1 : 0);
134 
135  // Start to initialize the return object.
136  util::dcrs dcrs;
137  dcrs.distribution.push_back(0);
138 
139  // Set the distributions for each rank. This happens on all ranks.
140  for(int r(0); r < size; ++r) {
141  const size_t indices = quot + ((size_t(r) >= (size - rem)) ? 1 : 0);
142  dcrs.distribution.push_back(dcrs.distribution[r] + indices);
143  } // for
144 
145  //--------------------------------------------------------------------------//
146  // Create the cell-to-cell graph.
147  //--------------------------------------------------------------------------//
148 
149  // Set the first offset (always zero).
150  dcrs.offsets.push_back(0);
151 
152  // Add the graph adjacencies by getting the neighbors of each
153  // cell index
154  using cellid = size_t;
155  using vertexid = size_t;
156 
157  // essentially vertex to cell and cell to cell through vertices
158  // connectivity.
159  std::map<vertexid, std::vector<cellid>> vertex2cells;
160  std::map<cellid, std::vector<cellid>> cell2cells;
161 
162  // build global cell to cell through # of shared vertices connectivity
163  for(size_t cell(0); cell < md.num_entities(FROM_DIMENSION); ++cell) {
164  std::map<cellid, size_t> cell_counts;
165 
166  for(auto vertex : md.entities(FROM_DIMENSION, 0, cell)) {
167  // build vertex to cell connectivity, O(n_cells * n_polygon_sides)
168  // of insert().
169  vertex2cells[vertex].push_back(cell);
170 
171  // Count the number of times this cell shares a common vertex with
172  // some other cell. O(n_cells * n_polygon_sides * vertex to cells degrees)
173  for(auto other : vertex2cells[vertex]) {
174  if(other != cell)
175  cell_counts[other] += 1;
176  }
177  }
178 
179  for(auto count : cell_counts) {
180  if(count.second > THRU_DIMENSION) {
181  // append cell to cell through "dimension" connectivity, we need to
182  // add both directions
183  cell2cells[cell].push_back(count.first);
184  cell2cells[count.first].push_back(cell);
185  }
186  }
187  }
188 
189  // turn subset of cell 2 cell connectivity to dcrs
190  for(size_t i(0); i < init_indices; ++i) {
191  auto cell = dcrs.distribution[rank] + i;
192 
193  for(auto n : cell2cells[cell]) {
194  dcrs.indices.push_back(n);
195  } // for
196 
197  dcrs.offsets.push_back(dcrs.offsets[i] + cell2cells[cell].size());
198  }
199 
200  return dcrs;
201 } // make_dcrs
202 
206 template<std::size_t DIMENSION,
207  std::size_t FROM_DIMENSION = DIMENSION,
208  std::size_t TO_DIMENSION = DIMENSION,
209  std::size_t THRU_DIMENSION = DIMENSION - 1>
210 void
212  util::dcrs & dcrs) {
213  int size;
214  int rank;
215 
216  MPI_Comm_size(MPI_COMM_WORLD, &size);
217  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
218 
219  //--------------------------------------------------------------------------//
220  // Create a naive initial distribution of the indices
221  //--------------------------------------------------------------------------//
222 
223  size_t quot = md.num_entities(FROM_DIMENSION) / size;
224  size_t rem = md.num_entities(FROM_DIMENSION) % size;
225 
226  // Each rank gets the average number of indices, with higher ranks
227  // getting an additional index for non-zero remainders.
228  size_t init_indices = quot + ((size_t(rank) >= (size - rem)) ? 1 : 0);
229 
230  // Start to initialize the return object.
231  dcrs.distribution.push_back(0);
232 
233  // Set the distributions for each rank. This happens on all ranks.
234  for(int r(0); r < size; ++r) {
235  const size_t indices = quot + ((size_t(r) >= (size - rem)) ? 1 : 0);
236  dcrs.distribution.push_back(dcrs.distribution[r] + indices);
237  } // for
238 
239  //--------------------------------------------------------------------------//
240  // Create the cell-to-cell graph.
241  //--------------------------------------------------------------------------//
242 
243  // Set the first offset (always zero).
244  dcrs.offsets.push_back(0);
245 
246  // Add the graph adjacencies by getting the neighbors of each
247  // cell index
248  using cellid = size_t;
249 
250  // essentially vertex to cell and cell to cell through vertices
251  // connectivity.
252  std::map<cellid, std::vector<cellid>> cell2other;
253 
254  // Travel from the FROM_DIMENSION (cell) to the TO_DIMENSION (other)
255  // via the THRU_DIMENSION (other)
256  for(size_t cell(dcrs.distribution[rank]); cell < dcrs.distribution[rank + 1];
257  ++cell) {
258  auto & this_cell2other = cell2other[cell];
259 
260  for(auto vertex : md.entities(FROM_DIMENSION, THRU_DIMENSION, cell)) {
261 
262  for(auto other : md.entities(THRU_DIMENSION, TO_DIMENSION, vertex)) {
263 
264  if(FROM_DIMENSION == TO_DIMENSION) {
265  if(cell != other)
266  this_cell2other.push_back(other);
267  }
268  else {
269  this_cell2other.push_back(other);
270  }
271  }
272  }
273  }
274 
275  // turn subset of cell 2 cell connectivity to dcrs
276  for(size_t i(0); i < init_indices; ++i) {
277  auto cell = dcrs.distribution[rank] + i;
278  auto & this_cell2other = cell2other.at(cell);
279 
280  std::sort(this_cell2other.begin(), this_cell2other.end());
281  auto last = std::unique(this_cell2other.begin(), this_cell2other.end());
282  auto first = this_cell2other.begin();
283  auto size = std::distance(first, last);
284 
285  for(auto it = first; it != last; ++it) {
286  dcrs.indices.push_back(*it);
287  } // for
288 
289  dcrs.offsets.push_back(dcrs.offsets[i] + size);
290  }
291 
292 } // make_dcrs
293 
294 template<typename SEND_TYPE, typename ID_TYPE, typename RECV_TYPE>
295 auto
296 alltoallv(const SEND_TYPE & sendbuf,
297  const ID_TYPE & sendcounts,
298  const ID_TYPE & senddispls,
299  RECV_TYPE & recvbuf,
300  const ID_TYPE & recvcounts,
301  const ID_TYPE & recvdispls,
302  decltype(MPI_COMM_WORLD) comm) {
303 
304  const auto mpi_send_t =
306  const auto mpi_recv_t =
308 
309  auto num_ranks = sendcounts.size();
310 
311  // create storage for the requests
312  std::vector<MPI_Request> requests;
313  requests.reserve(2 * num_ranks);
314 
315  // post receives
316  auto tag = 0;
317 
318  for(size_t rank = 0; rank < num_ranks; ++rank) {
319  auto count = recvcounts[rank];
320  if(count > 0) {
321  auto buf = recvbuf.data() + recvdispls[rank];
322  requests.resize(requests.size() + 1);
323  auto & my_request = requests.back();
324  auto ret =
325  MPI_Irecv(buf, count, mpi_recv_t, rank, tag, comm, &my_request);
326  if(ret != MPI_SUCCESS)
327  return ret;
328  }
329  }
330 
331  // send the data
332  for(size_t rank = 0; rank < num_ranks; ++rank) {
333  auto count = sendcounts[rank];
334  if(count > 0) {
335  auto buf = sendbuf.data() + senddispls[rank];
336  requests.resize(requests.size() + 1);
337  auto & my_request = requests.back();
338  auto ret =
339  MPI_Isend(buf, count, mpi_send_t, rank, tag, comm, &my_request);
340  if(ret != MPI_SUCCESS)
341  return ret;
342  }
343  }
344 
345  // wait for everything to complete
346  std::vector<MPI_Status> status(requests.size());
347  auto ret = MPI_Waitall(requests.size(), requests.data(), status.data());
348 
349  return ret;
350 }
351 
355 template<typename T, typename Vector>
356 T
357 rank_owner(const Vector & distribution, T i) {
358  auto it = std::upper_bound(distribution.begin(), distribution.end(), i);
359  flog_assert(it != distribution.end(), "invalid iterator");
360  return std::distance(distribution.begin(), it) - 1;
361 }
362 
366 template<typename Vector>
367 void
368 subdivide(size_t nelem, size_t npart, Vector & dist) {
369 
370  size_t quot = nelem / npart;
371  size_t rem = nelem % npart;
372 
373  dist.reserve(npart + 1);
374  dist.push_back(0);
375 
376  // Set the distributions for each rank. This happens on all ranks.
377  // Each rank gets the average number of indices, with higher ranks
378  // getting an additional index for non-zero remainders.
379  for(size_t r(0); r < npart; ++r) {
380  const size_t indices = quot + ((r >= (npart - rem)) ? 1 : 0);
381  dist.push_back(dist[r] + indices);
382  } // for
383 };
384 
388 template<std::size_t MESH_DIMENSION>
389 void
391  size_t from_dimension,
392  size_t to_dimension,
393  size_t min_connections,
394  util::dcrs & dcrs) {
395  int size;
396  int rank;
397 
398  MPI_Comm_size(MPI_COMM_WORLD, &size);
399  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
400 
401  // the mpi data type for size_t
402  const auto mpi_size_t = util::mpi_typetraits<size_t>::type();
403 
404  //----------------------------------------------------------------------------
405  // Get cell partitioning
406  //
407  // Note: We create a global index space, but this may be different
408  // from the global numbering of the original mesh.
409  //----------------------------------------------------------------------------
410 
411  auto num_cells = md.num_entities(from_dimension);
412 
413  // recompute the partioning
414  dcrs.clear();
415  dcrs.distribution.resize(size + 1);
416  dcrs.distribution[0] = 0;
417 
418  MPI_Allgather(&num_cells,
419  1,
420  mpi_size_t,
421  dcrs.distribution.data() + 1,
422  1,
423  mpi_size_t,
424  MPI_COMM_WORLD);
425 
426  for(int i = 0; i < size; ++i)
427  dcrs.distribution[i + 1] += dcrs.distribution[i];
428 
429  // starting and ending cell indices
430  auto cells_start = dcrs.distribution[rank];
431 
432  //----------------------------------------------------------------------------
433  // Create a naive initial distribution of the vertices
434  //----------------------------------------------------------------------------
435 
436  // first figure out the maximum global id on this rank
437  const auto & vertex_local_to_global = md.local_to_global(to_dimension);
438 
439  size_t max_global_vert_id{0};
440  for(auto v : vertex_local_to_global)
441  max_global_vert_id = std::max(max_global_vert_id, v);
442 
443  // now the global max id
444  size_t tot_verts{0};
445  MPI_Allreduce(
446  &max_global_vert_id, &tot_verts, 1, mpi_size_t, MPI_MAX, MPI_COMM_WORLD);
447  tot_verts++;
448 
449  decltype(dcrs.distribution) vert_dist;
450  subdivide(tot_verts, size, vert_dist);
451 
452  //----------------------------------------------------------------------------
453  // Create the Local vertex-to-cell graph.
454  //----------------------------------------------------------------------------
455 
456  // essentially vertex to cell connectivity
457  std::map<size_t, std::vector<size_t>> vertex2cell;
458  const auto & cells2vertex = md.entities_crs(from_dimension, to_dimension);
459 
460  // Travel from the FROM_DIMENSION (cell) to the TO_DIMENSION (other)
461  for(size_t ic = 0; ic < num_cells; ++ic) {
462  auto cell = cells_start + ic;
463  for(auto i = cells2vertex.offsets[ic]; i < cells2vertex.offsets[ic + 1];
464  ++i) {
465  auto iv = cells2vertex.indices[i];
466  auto vertex = vertex_local_to_global[iv];
467  vertex2cell[vertex].emplace_back(cell);
468  }
469  }
470 
471  // remove duplicates
472  auto remove_duplicates = [](auto & vertex2cell) {
473  for(auto & vertex_cells : vertex2cell) {
474  auto v = vertex_cells.first;
475  auto & cells = vertex_cells.second;
476  std::sort(cells.begin(), cells.end());
477  auto first = cells.begin();
478  auto last = std::unique(first, cells.end());
479  cells.erase(last, cells.end());
480  }
481  };
482 
483  remove_duplicates(vertex2cell);
484 
485  //----------------------------------------------------------------------------
486  // Send vertex information to the deemed vertex-owner
487  //----------------------------------------------------------------------------
488 
489  // We send the results for each vertex to their owner rank, which was
490  // defined using the vertex subdivision above
491  std::vector<size_t> sendcounts(size, 0);
492 
493  // count
494  for(const auto & vs_pair : vertex2cell) {
495  size_t global_id = vs_pair.first;
496  auto r = rank_owner(vert_dist, global_id);
497  // we will be sending vertex id, number of cells, plus cell ids
498  if(r != size_t(rank)) {
499  auto n = 2 + vs_pair.second.size();
500  sendcounts[r] += n;
501  }
502  }
503 
504  // finish displacements
505  std::vector<size_t> senddispls(size + 1);
506  senddispls[0] = 0;
507  for(int r = 0; r < size; ++r) {
508  senddispls[r + 1] = senddispls[r] + sendcounts[r];
509  sendcounts[r] = 0;
510  }
511 
512  // fill buffers
513  std::vector<size_t> sendbuf(senddispls[size]);
514 
515  for(const auto & vs_pair : vertex2cell) {
516  size_t global_id = vs_pair.first;
517  auto r = rank_owner(vert_dist, global_id);
518  // we will be sending vertex id, number of cells, plus cell ids
519  if(r != size_t(rank)) {
520  // get offset
521  auto offset = senddispls[r] + sendcounts[r];
522  // populate data
523  sendbuf[offset++] = global_id;
524  sendbuf[offset++] = vs_pair.second.size();
525  for(auto v : vs_pair.second)
526  sendbuf[offset++] = v;
527  // bump counters
528  auto n = 2 + vs_pair.second.size();
529  sendcounts[r] += n;
530  }
531  }
532 
533  std::vector<size_t> recvcounts(size, 0);
534  auto ret = MPI_Alltoall(sendcounts.data(),
535  1,
536  mpi_size_t,
537  recvcounts.data(),
538  1,
539  mpi_size_t,
540  MPI_COMM_WORLD);
541 
542  if(ret != MPI_SUCCESS)
543  flog_error("Error communicating vertex counts");
544 
545  // how much info will we be receiving
546  std::vector<size_t> recvdispls(size + 1);
547  recvdispls[0] = 0;
548  for(int r = 0; r < size; ++r)
549  recvdispls[r + 1] = recvdispls[r] + recvcounts[r];
550  std::vector<size_t> recvbuf(recvdispls[size]);
551 
552  // now send the actual vertex info
553  ret = alltoallv(sendbuf,
554  sendcounts,
555  senddispls,
556  recvbuf,
557  recvcounts,
558  recvdispls,
559  MPI_COMM_WORLD);
560 
561  if(ret != MPI_SUCCESS)
562  flog_error("Error communicating vertices");
563 
564  //----------------------------------------------------------------------------
565  // Append received vertex information to the local vertex-to-cell graph
566  //----------------------------------------------------------------------------
567 
568  // and add the results to our local list
569  std::map<size_t, std::vector<size_t>> vertex2rank;
570 
571  for(int r = 0; r < size; ++r) {
572  for(size_t i = recvdispls[r]; i < recvdispls[r + 1];) {
573  // get vertex
574  auto vertex = recvbuf[i];
575  ++i;
576  // keep track of ranks that share this vertex
577  vertex2rank[vertex].emplace_back(r);
578  flog_assert(i < recvdispls[r + 1], "invalid index");
579  // unpack cell neighbors
580  auto n = recvbuf[i];
581  ++i;
582  for(size_t j = 0; j < n; ++j) {
583  flog_assert(i < recvdispls[r + 1], "invalid index");
584  auto cell = recvbuf[i];
585  ++i;
586  // might not already be there
587  vertex2cell[vertex].emplace_back(cell);
588  }
589  }
590  }
591 
592  // remove duplicates
593  remove_duplicates(vertex2cell);
594 
595  //----------------------------------------------------------------------------
596  // Send back the final results for shared vertices
597  //----------------------------------------------------------------------------
598 
599  // now perpare to send results back
600 
601  // count send buffer size
602  std::fill(sendcounts.begin(), sendcounts.end(), 0);
603  for(const auto & vertex_pair : vertex2rank) {
604  auto vertex = vertex_pair.first;
605  for(auto r : vertex_pair.second) {
606  if(r != size_t(rank)) { // should always enter anyway!
607  // better already be there
608  const auto & cells = vertex2cell.at(vertex);
609  // we will be sending vertex id, number of cells, plus cell ids
610  auto n = 2 + cells.size();
611  sendcounts[r] += n;
612  }
613  }
614  }
615 
616  // finish displacements
617  senddispls[0] = 0;
618  for(int r = 0; r < size; ++r)
619  senddispls[r + 1] = senddispls[r] + sendcounts[r];
620 
621  // resize send buffer
622  sendbuf.clear();
623  sendbuf.resize(senddispls[size]);
624 
625  // now fill buffer
626  std::fill(sendcounts.begin(), sendcounts.end(), 0);
627  for(const auto & vertex_pair : vertex2rank) {
628  auto vertex = vertex_pair.first;
629  for(auto r : vertex_pair.second) {
630  if(r != size_t(rank)) { // should always enter anyway!
631  auto j = senddispls[r] + sendcounts[r];
632  // better already be there
633  const auto & cells = vertex2cell.at(vertex);
634  // we will be sending vertex id, number of cells, plus cell ids
635  sendbuf[j++] = vertex;
636  sendbuf[j++] = cells.size();
637  for(auto c : cells)
638  sendbuf[j++] = c;
639  // increment send counter
640  sendcounts[r] = j - senddispls[r];
641  }
642  }
643  }
644 
645  // send counts
646  std::fill(recvcounts.begin(), recvcounts.end(), 0);
647  ret = MPI_Alltoall(sendcounts.data(),
648  1,
649  mpi_size_t,
650  recvcounts.data(),
651  1,
652  mpi_size_t,
653  MPI_COMM_WORLD);
654 
655  if(ret != MPI_SUCCESS)
656  flog_error("Error communicating back vertex counts");
657 
658  // how much info will we be receiving
659  recvdispls[0] = 0;
660  for(int r = 0; r < size; ++r)
661  recvdispls[r + 1] = recvdispls[r] + recvcounts[r];
662 
663  // how much info will we be receiving
664  recvbuf.clear();
665  recvbuf.resize(recvdispls.at(size));
666 
667  // now send the final vertex info back
668  ret = alltoallv(sendbuf,
669  sendcounts,
670  senddispls,
671  recvbuf,
672  recvcounts,
673  recvdispls,
674  MPI_COMM_WORLD);
675 
676  if(ret != MPI_SUCCESS)
677  flog_error("Error communicating new vertices");
678 
679  //----------------------------------------------------------------------------
680  // Append received vertex information to the local vertex-to-cell graph
681  //----------------------------------------------------------------------------
682 
683  for(int r = 0; r < size; ++r) {
684  for(size_t i = recvdispls[r]; i < recvdispls[r + 1];) {
685  // get vertex
686  auto vertex = recvbuf[i];
687  ++i;
688 
689  // keep track of ranks that share this vertex
690  vertex2rank[vertex].emplace_back(r);
691  flog_assert(i < recvdispls[r + 1], "invalid index");
692 
693  // unpack cell neighbors
694  auto n = recvbuf[i];
695  ++i;
696  for(size_t j = 0; j < n; ++j) {
697  flog_assert(i < recvdispls[r + 1], "invalid index");
698  auto cell = recvbuf[i];
699  ++i;
700  // might not already be there
701  vertex2cell[vertex].emplace_back(cell);
702  }
703  }
704  }
705 
706  // remove duplicates
707  remove_duplicates(vertex2cell);
708 
709  //----------------------------------------------------------------------------
710  // Invert for final cell-to-cell graph
711  //----------------------------------------------------------------------------
712 
713  // Set the first offset (always zero).
714  dcrs.offsets.reserve(num_cells);
715  dcrs.indices.reserve(from_dimension * from_dimension * num_cells); // guess
716  dcrs.offsets.push_back(0);
717 
718  std::map<size_t, size_t> cell_counts;
719 
720  for(size_t ic = 0; ic < num_cells; ++ic) {
721  auto cell = cells_start + ic;
722 
723  cell_counts.clear();
724 
725  // iterate over vertices
726  auto start = cells2vertex.offsets[ic];
727  auto end = cells2vertex.offsets[ic + 1];
728  for(auto i = start; i < end; ++i) {
729  auto iv = cells2vertex.indices[i];
730  auto vertex = vertex_local_to_global[iv];
731  // now count attached cells
732  for(auto other : vertex2cell.at(vertex)) {
733  // new entries are initialized to zero
734  if(other != cell)
735  cell_counts[other] += 1;
736  }
737  }
738 
739  // now add results
740  int num_connections{0};
741 
742  for(auto count : cell_counts) {
743  if(count.second >= min_connections) {
744  dcrs.indices.emplace_back(count.first);
745  num_connections++;
746  }
747  }
748 
749  dcrs.offsets.emplace_back(dcrs.offsets.back() + num_connections);
750  }
751 
752 } // make_dcrs
753 
757 template<std::size_t DIMENSION>
758 void
759 migrate(size_t dimension,
760  const std::vector<size_t> & partitioning,
761  util::dcrs & dcrs,
763 
764  int comm_size, comm_rank;
765 
766  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
767  MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
768 
769  //----------------------------------------------------------------------------
770  // Pack information together.
771  //
772  // We need to query the mesh definition to see if there is extra info we
773  // want to send.
774  //----------------------------------------------------------------------------
775 
776  using byte_t = typename parallel_definition<DIMENSION>::byte_t;
777 
778  std::vector<size_t> sendcounts(comm_size, 0);
779  std::vector<size_t> senddispls(comm_size + 1);
780  std::vector<byte_t> sendbuf;
781  std::vector<size_t> erase_local_ids;
782 
783  senddispls[0] = 0;
784  auto num_elements = dcrs.size();
785 
786  // Find the indices we need torequest.
787  for(int rank(0); rank < comm_size; ++rank) {
788 
789  // keep track of the buffer beginning
790  auto start_buf = sendbuf.size();
791 
792  // loop over entities we partitionied
793  for(size_t local_id(0); local_id < num_elements && rank != comm_rank;
794  ++local_id) {
795 
796  // the global id
797  auto global_id = dcrs.distribution[comm_rank] + local_id;
798 
799  //------------------------------------------------------------------------
800  // No migration necessary
801  if(partitioning[local_id] == size_t(comm_rank)) {
802  // just keep track of the global ids
803  }
804  //------------------------------------------------------------------------
805  // Entity to be migrated
806  else if(partitioning[local_id] == size_t(rank)) {
807  // mark for deletion
808  erase_local_ids.emplace_back(local_id);
809  // global id
810  cast_insert(&global_id, 1, sendbuf);
811  // dcrs info
812  auto start = dcrs.offsets[local_id];
813  auto num_offsets = dcrs.offsets[local_id + 1] - start;
814  cast_insert(&num_offsets, 1, sendbuf);
815  cast_insert(&dcrs.indices[start], num_offsets, sendbuf);
816  // now specific info related to mesh
817  md.pack(dimension, local_id, sendbuf);
818  }
819  } // for entities
820 
821  // keep track of offsets
822  sendcounts[rank] = sendbuf.size() - start_buf;
823  senddispls[rank + 1] = senddispls[rank] + sendcounts[rank];
824 
825  } // for ranks
826 
827  //----------------------------------------------------------------------------
828  // Erase entities to be migrated
829  //----------------------------------------------------------------------------
830 
831  if(erase_local_ids.size()) {
832 
833  // sort them first
834  std::sort(erase_local_ids.begin(), erase_local_ids.end());
835  auto last = std::unique(erase_local_ids.begin(), erase_local_ids.end());
836  flog_assert(last == erase_local_ids.end(), "duplicate ids to delete");
837 
838  // erase dcrs elements
839  dcrs.erase(erase_local_ids);
840 
841  // erase mesh elements
842  md.erase(dimension, erase_local_ids);
843 
844  // decrement counter
845  num_elements -= erase_local_ids.size();
846  }
847 
848  //----------------------------------------------------------------------------
849  // Send information.
850  //----------------------------------------------------------------------------
851 
852  // the mpi data type for size_t
853  const auto mpi_size_t = util::mpi_typetraits<size_t>::type();
854 
855  std::vector<size_t> recvcounts(comm_size, 0);
856 
857  auto ret = MPI_Alltoall(sendcounts.data(),
858  1,
859  mpi_size_t,
860  recvcounts.data(),
861  1,
862  mpi_size_t,
863  MPI_COMM_WORLD);
864 
865  if(ret != MPI_SUCCESS)
866  flog_error("Error communicating vertex counts");
867 
868  // how much info will we be receiving
869  std::vector<size_t> recvdispls(comm_size + 1);
870  recvdispls[0] = 0;
871  for(int r = 0; r < comm_size; ++r)
872  recvdispls[r + 1] = recvdispls[r] + recvcounts[r];
873  std::vector<byte_t> recvbuf(recvdispls[comm_size]);
874 
875  // now send the actual info
876  ret = alltoallv(sendbuf,
877  sendcounts,
878  senddispls,
879  recvbuf,
880  recvcounts,
881  recvdispls,
882  MPI_COMM_WORLD);
883 
884  if(ret != MPI_SUCCESS)
885  flog_error("Error communicating vertices");
886 
887  //----------------------------------------------------------------------------
888  // Unpack information.
889  //----------------------------------------------------------------------------
890 
891  // Add indices to primary
892  for(int rank(0); rank < comm_size; ++rank) {
893 
894  auto start = recvdispls[rank];
895  auto end = recvdispls[rank + 1];
896  const auto * buffer = &recvbuf[start];
897  const auto * buffer_end = &recvbuf[end];
898 
899  // Typically, one would check for equality between an iterator address
900  // and the ending pointer address. This might cause an infinate loop
901  // if there is an error in unpacking. So I think testing on byte
902  // index is safer since there is no danger of iterating past the end.
903  for(size_t i = start; i < end;) {
904 
905  // capture start of buffer
906  const auto * buffer_start = buffer;
907 
908  // the local_id
909  auto local_id = num_elements++;
910 
911  // global id
912  size_t global_id;
913  uncast(buffer, 1, &global_id);
914 
915  // dcrs info
916  auto last = dcrs.offsets[local_id];
917 
918  size_t num_offsets;
919  uncast(buffer, 1, &num_offsets);
920  dcrs.offsets.emplace_back(last + num_offsets);
921 
922  dcrs.indices.resize(dcrs.indices.size() + num_offsets);
923  uncast(buffer, num_offsets, &dcrs.indices[last]);
924 
925  // now specific info related to mesh
926  md.unpack(dimension, local_id, buffer);
927 
928  // increment pointer
929  i += std::distance(buffer_start, buffer);
930 
931  } // for
932 
933  // make sre we are at the right spot in the buffer
934  flog_assert(buffer == buffer_end, "Unpacking mismatch");
935 
936  } // for
937 
938  flog_assert(num_elements == dcrs.size(),
939  "Mismatch in the final number of elements after migration");
940 
941  flog_assert(num_elements > 0,
942  "At least one rank has an empty primary coloring. Please either "
943  "increase the problem size or use fewer ranks");
944 
945  // recompute the partioning
946  dcrs.distribution.clear();
947  dcrs.distribution.resize(comm_size + 1);
948  dcrs.distribution[0] = 0;
949 
950  MPI_Allgather(&num_elements,
951  1,
952  mpi_size_t,
953  dcrs.distribution.data() + 1,
954  1,
955  mpi_size_t,
956  MPI_COMM_WORLD);
957 
958  for(int i = 0; i < comm_size; ++i)
959  dcrs.distribution[i + 1] += dcrs.distribution[i];
960 }
961 
962 inline void
963 get_owner_info(const util::dcrs & dcrs,
964  index_coloring & entities,
965  coloring_info & color_info) {
966 
967  int comm_size, comm_rank;
968  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
969  MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
970 
971  //----------------------------------------------------------------------------
972  // Determine primary/ghost division
973  //----------------------------------------------------------------------------
974 
975  // starting and ending cell indices
976  auto start = dcrs.distribution[comm_rank];
977  auto end = dcrs.distribution[comm_rank + 1] - 1;
978  auto num_entities = end - start + 1;
979  // size_t num_ghost = 0;
980 
981  // keep track of shared ranks
982  std::set<size_t> shared_ranks;
983 
984  for(size_t local_id = 0; local_id < num_entities; ++local_id) {
985 
986  // determine clobal id
987  auto global_id = start + local_id;
988  // reset storage for shared rank ids
989  shared_ranks.clear();
990 
991  for(auto i = dcrs.offsets[local_id]; i < dcrs.offsets[local_id + 1]; ++i) {
992  // who owns the neighbor cell
993  auto neighbor = dcrs.indices[i];
994  auto rank = rank_owner(dcrs.distribution, neighbor);
995 
996  // neighbor is a ghost cell
997  if(rank != size_t(comm_rank)) {
998  // create possible new ghost
999  // auto ghost_id = num_entities+num_ghost;
1000  auto ghost_id = neighbor - dcrs.distribution[rank];
1001  auto e = entity_info(neighbor, rank, ghost_id, comm_rank);
1002  // search for it
1003  auto it = entities.ghost.find(e);
1004  // if it has not been added, then add it!
1005  if(it == entities.ghost.end()) {
1006  entities.ghost.emplace(e);
1007  color_info.ghost_owners.insert(rank);
1008  // num_ghost++;
1009  }
1010  // keep track of shared ranks for the parent cell
1011  shared_ranks.emplace(rank);
1012  }
1013  } // neighbors
1014 
1015  // the original cell is exclusive
1016  if(shared_ranks.empty()) {
1017  entities.exclusive.emplace(entity_info(global_id, comm_rank, local_id));
1018  }
1019  // otherwise it must be shared
1020  else {
1021  entities.shared.emplace(
1022  entity_info(global_id, comm_rank, local_id, shared_ranks));
1023  color_info.shared_users.insert(shared_ranks.begin(), shared_ranks.end());
1024  }
1025  }
1026 
1027  // store the sizes of each set
1028  color_info.exclusive = entities.exclusive.size();
1029  color_info.shared = entities.shared.size();
1030  color_info.ghost = entities.ghost.size();
1031 }
1032 
1036 #if 0 // FIXME: DO WE NEED THIS?
1037 inline void
1038 color_entities(const util::crs & cells2entity,
1039  const std::vector<size_t> & local2global,
1040  const std::map<size_t, size_t> & global2local,
1041  const index_coloring & cells,
1042  index_coloring & entities,
1043  coloring_info & color_info,
1044  size_t & connectivity_counts) {
1045 
1046  int comm_size, comm_rank;
1047  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
1048  MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
1049 
1050  // the mpi data type for size_t
1051  const auto mpi_size_t = util::mpi_typetraits<size_t>::type();
1052 
1053  //----------------------------------------------------------------------------
1054  // Start marking all vertices as exclusive
1055  //----------------------------------------------------------------------------
1056 
1057  // marked as exclusive by default
1058  auto num_ents = local2global.size();
1059  std::vector<bool> exclusive(num_ents, true);
1060 
1061  // set of possible shared vertices
1062  std::vector<size_t> potential_shared;
1063 
1064  // now mark shared
1065  for(const auto & c : cells.shared) {
1066  auto start = cells2entity.offsets[c.offset];
1067  auto end = cells2entity.offsets[c.offset + 1];
1068  for(size_t i = start; i < end; ++i) {
1069  auto local_id = cells2entity.indices[i];
1070  exclusive[local_id] = false;
1071  potential_shared.emplace_back(local_id);
1072  }
1073  }
1074 
1075  // remove duplicates
1076  auto remove_unique = [](auto & list) {
1077  std::sort(list.begin(), list.end());
1078  auto last = std::unique(list.begin(), list.end());
1079  list.erase(last, list.end());
1080  };
1081  remove_unique(potential_shared);
1082 
1083  //----------------------------------------------------------------------------
1084  // Determine cell-to-vertex connecitivity for my shared cells
1085  //----------------------------------------------------------------------------
1086 
1087  // first count for storage
1088  std::vector<size_t> sendcounts(comm_size, 0);
1089  std::vector<size_t> senddispls(comm_size + 1);
1090 
1091  for(const auto & c : cells.shared) {
1092  // get cell info
1093  auto start = cells2entity.offsets[c.offset];
1094  auto end = cells2entity.offsets[c.offset + 1];
1095  auto n = end - start;
1096  // loop over shared ranks
1097  for(auto r : c.shared) {
1098  // we will be sending the cell number of vertices, plus vertices
1099  if(r != comm_rank)
1100  sendcounts[r] += 1 + n;
1101  }
1102  }
1103 
1104  // finish displacements
1105  senddispls[0] = 0;
1106  for(size_t r = 0; r < comm_size; ++r)
1107  senddispls[r + 1] = senddispls[r] + sendcounts[r];
1108 
1109  // now fill buffers
1110  std::vector<size_t> sendbuf(senddispls[comm_size]);
1111  std::fill(sendcounts.begin(), sendcounts.end(), 0);
1112 
1113  for(const auto & c : cells.shared) {
1114  // get cell info
1115  auto start = cells2entity.offsets[c.offset];
1116  auto end = cells2entity.offsets[c.offset + 1];
1117  auto n = end - start;
1118  // loop over shared ranks
1119  for(auto r : c.shared) {
1120  // we will be sending number of vertices, plus vertices
1121  if(r != comm_rank) {
1122  // first the id and size
1123  auto j = senddispls[r] + sendcounts[r];
1124  sendbuf[j++] = n;
1125  for(auto i = start; i < end; ++i) {
1126  auto local_id = cells2entity.indices[i];
1127  auto global_id = local2global[local_id];
1128  sendbuf[j++] = global_id;
1129  }
1130  // increment send counter
1131  sendcounts[r] = j - senddispls[r];
1132  }
1133  } // shared ranks
1134  }
1135 
1136  //----------------------------------------------------------------------------
1137  // Send shared information
1138  //----------------------------------------------------------------------------
1139 
1140  // send the counts
1141  std::vector<size_t> recvcounts(comm_size);
1142  auto ret = MPI_Alltoall(sendcounts.data(), 1, mpi_size_t, recvcounts.data(),
1143  1, mpi_size_t, MPI_COMM_WORLD);
1144  if(ret != MPI_SUCCESS)
1145  flog_error("Error communicating vertex counts");
1146 
1147  // how much info will we be receiving
1148  std::vector<size_t> recvdispls(comm_size + 1);
1149  recvdispls[0] = 0;
1150  for(size_t r = 0; r < comm_size; ++r)
1151  recvdispls[r + 1] = recvdispls[r] + recvcounts[r];
1152  std::vector<size_t> recvbuf(recvdispls[comm_size]);
1153 
1154  // figure out the size of the connectivity array
1155  connectivity_counts = cells2entity.indices.size();
1156 
1157  // now send the actual vertex info
1158  ret = alltoallv(sendbuf, sendcounts, senddispls, recvbuf, recvcounts,
1159  recvdispls, MPI_COMM_WORLD);
1160  if(ret != MPI_SUCCESS)
1161  flog_error("Error communicating vertices");
1162 
1163  //----------------------------------------------------------------------------
1164  // Unpack results
1165  //----------------------------------------------------------------------------
1166 
1167  // set of possible ghost vertices
1168  std::vector<size_t> potential_ghost;
1169 
1170  for(size_t r = 0; r < comm_size; ++r) {
1171  for(size_t i = recvdispls[r]; i < recvdispls[r + 1];) {
1172  // num of vertices
1173  auto n = recvbuf[i];
1174  i++;
1175  connectivity_counts += n;
1176  // no sift through ghost vertices
1177  for(auto j = 0; j < n; ++j, ++i) {
1178  // local and global ids of the vertex (relative to sender)
1179  auto ent_global_id = recvbuf[i];
1180  // Don't bother checking if i have this vertex yet, even if I already
1181  // have it, it might be a ghost
1182  potential_ghost.emplace_back(ent_global_id);
1183  } // vertices
1184  }
1185  }
1186 
1187  // remove duplicates
1188  remove_unique(potential_ghost);
1189 
1190  //----------------------------------------------------------------------------
1191  // Create a naive initial distribution of the vertices
1192  //----------------------------------------------------------------------------
1193 
1194  // first figure out the maximum global id on this rank
1195  size_t max_global_ent_id{0};
1196  for(auto v : local2global)
1197  max_global_ent_id = std::max(max_global_ent_id, v);
1198 
1199  // now the global max id
1200  size_t tot_ents{0};
1201  MPI_Allreduce(
1202  &max_global_ent_id, &tot_ents, 1, mpi_size_t, MPI_MAX, MPI_COMM_WORLD);
1203  tot_ents++;
1204 
1205  std::vector<size_t> ent_dist;
1206  subdivide(tot_ents, comm_size, ent_dist);
1207 
1208  //----------------------------------------------------------------------------
1209  // Send shared vertex information to the deemed vertex-owner
1210  //
1211  // After this, we know who is the sharer of a particular vertex
1212  //----------------------------------------------------------------------------
1213 
1214  struct owner_t {
1215  size_t rank;
1216  size_t local_id;
1217  std::vector<size_t> ghost_ranks;
1218  owner_t(size_t r, size_t id) : rank(r), local_id(id) {}
1219  };
1220  std::map<size_t, owner_t> entities2rank;
1221 
1222  // We send the results for each vertex to their owner rank, which was
1223  // defined using the vertex subdivision above
1224  std::fill(sendcounts.begin(), sendcounts.end(), 0);
1225 
1226  for(auto local_id : potential_shared) {
1227  auto global_id = local2global[local_id];
1228  // who owns this vertex
1229  auto rank = rank_owner(ent_dist, global_id);
1230  // we will be sending global and local ids
1231  if(rank != comm_rank)
1232  sendcounts[rank] += 2;
1233  } // vert
1234 
1235  // finish displacements
1236  senddispls[0] = 0;
1237  for(size_t r = 0; r < comm_size; ++r)
1238  senddispls[r + 1] = senddispls[r] + sendcounts[r];
1239 
1240  // now fill buffers
1241  sendbuf.clear();
1242  sendbuf.resize(senddispls[comm_size]);
1243  std::fill(sendcounts.begin(), sendcounts.end(), 0);
1244 
1245  for(const auto & local_id : potential_shared) {
1246  auto global_id = local2global[local_id];
1247  // who owns this vertex
1248  auto rank = rank_owner(ent_dist, global_id);
1249  // we will be sending vertex id, number of cells, plus cell ids
1250  if(rank != comm_rank) {
1251  auto i = senddispls[rank] + sendcounts[rank];
1252  sendbuf[i] = global_id;
1253  sendbuf[i + 1] = local_id;
1254  sendcounts[rank] += 2;
1255  }
1256  // if its ours, just add it to the list
1257  else {
1258  entities2rank.emplace(global_id, owner_t{rank, local_id});
1259  }
1260  } // vert
1261 
1262  // send counts
1263  ret = MPI_Alltoall(sendcounts.data(), 1, mpi_size_t, recvcounts.data(), 1,
1264  mpi_size_t, MPI_COMM_WORLD);
1265  if(ret != MPI_SUCCESS)
1266  flog_error("Error communicating vertex counts");
1267 
1268  // how much info will we be receiving
1269  recvdispls[0] = 0;
1270  for(size_t r = 0; r < comm_size; ++r)
1271  recvdispls[r + 1] = recvdispls[r] + recvcounts[r];
1272 
1273  recvbuf.clear();
1274  recvbuf.resize(recvdispls[comm_size]);
1275 
1276  // now send the actual vertex info
1277  ret = alltoallv(sendbuf, sendcounts, senddispls, recvbuf, recvcounts,
1278  recvdispls, MPI_COMM_WORLD);
1279  if(ret != MPI_SUCCESS)
1280  flog_error("Error communicating vertices");
1281 
1282  // upack results
1283  for(size_t r = 0; r < comm_size; ++r) {
1284  for(size_t i = recvdispls[r]; i < recvdispls[r + 1]; i += 2) {
1285  auto global_id = recvbuf[i];
1286  auto local_id = recvbuf[i + 1];
1287  auto res = entities2rank.emplace(global_id, owner_t{r, local_id});
1288  if(!res.second && r < res.first->second.rank) {
1289  res.first->second = {r, local_id};
1290  }
1291  }
1292  }
1293 
1294  //----------------------------------------------------------------------------
1295  // Send ghost vertex information to the deemed vertex-owner
1296  //----------------------------------------------------------------------------
1297 
1298  // We send the results for each vertex to their owner rank, which was
1299  // defined using the vertex subdivision above
1300  std::fill(sendcounts.begin(), sendcounts.end(), 0);
1301 
1302  for(auto global_id : potential_ghost) {
1303  // who owns this vertex
1304  auto rank = rank_owner(ent_dist, global_id);
1305  // we will be sending global ids
1306  if(rank != comm_rank)
1307  sendcounts[rank]++;
1308  } // vert
1309 
1310  // finish displacements
1311  senddispls[0] = 0;
1312  for(size_t r = 0; r < comm_size; ++r)
1313  senddispls[r + 1] = senddispls[r] + sendcounts[r];
1314 
1315  // now fill buffers
1316  sendbuf.clear();
1317  sendbuf.resize(senddispls[comm_size]);
1318  std::fill(sendcounts.begin(), sendcounts.end(), 0);
1319 
1320  for(const auto & global_id : potential_ghost) {
1321  // who owns this vertex
1322  auto rank = rank_owner(ent_dist, global_id);
1323  // we will be sending global ids
1324  if(rank != comm_rank) {
1325  auto i = senddispls[rank] + sendcounts[rank];
1326  sendbuf[i] = global_id;
1327  sendcounts[rank]++;
1328  }
1329  // otherwise, i am responsible for this ghost
1330  else {
1331  auto & owner = entities2rank.at(global_id);
1332  if(owner.rank != rank) {
1333  owner.ghost_ranks.emplace_back(rank);
1334  }
1335  }
1336  } // vert
1337 
1338  // send counts
1339  ret = MPI_Alltoall(sendcounts.data(), 1, mpi_size_t, recvcounts.data(), 1,
1340  mpi_size_t, MPI_COMM_WORLD);
1341  if(ret != MPI_SUCCESS)
1342  flog_error("Error communicating vertex counts");
1343 
1344  // how much info will we be receiving
1345  recvdispls[0] = 0;
1346  for(size_t r = 0; r < comm_size; ++r)
1347  recvdispls[r + 1] = recvdispls[r] + recvcounts[r];
1348 
1349  recvbuf.clear();
1350  recvbuf.resize(recvdispls[comm_size]);
1351 
1352  // now send the actual vertex info
1353  ret = alltoallv(sendbuf, sendcounts, senddispls, recvbuf, recvcounts,
1354  recvdispls, MPI_COMM_WORLD);
1355  if(ret != MPI_SUCCESS)
1356  flog_error("Error communicating vertices");
1357 
1358  // upack results
1359  for(size_t r = 0; r < comm_size; ++r) {
1360  for(size_t i = recvdispls[r]; i < recvdispls[r + 1]; i++) {
1361  auto global_id = recvbuf[i];
1362  // definately a ghost!
1363  auto & owner = entities2rank.at(global_id);
1364  if(owner.rank != r) {
1365  owner.ghost_ranks.emplace_back(r);
1366  }
1367  }
1368  }
1369 
1370  //----------------------------------------------------------------------------
1371  // Send back the ghost/shared information to everyone that needs it
1372  //----------------------------------------------------------------------------
1373  std::fill(sendcounts.begin(), sendcounts.end(), 0);
1374 
1375  // figure out send counts
1376  std::vector<size_t> ranks;
1377  for(const auto & pair : entities2rank) {
1378  const auto owner = pair.second;
1379  // who am i sending to
1380  ranks.assign(owner.ghost_ranks.begin(), owner.ghost_ranks.end());
1381  ranks.push_back(owner.rank);
1382  // we will be sending id+rank+offset+num_ghost+ghost_ranks
1383  auto n = 4 + owner.ghost_ranks.size();
1384  for(auto rank : ranks) {
1385  if(rank != comm_rank)
1386  sendcounts[rank] += n;
1387  }
1388  } // vert
1389 
1390  // finish displacements
1391  senddispls[0] = 0;
1392  for(size_t r = 0; r < comm_size; ++r)
1393  senddispls[r + 1] = senddispls[r] + sendcounts[r];
1394 
1395  // now fill buffers
1396  sendbuf.clear();
1397  sendbuf.resize(senddispls[comm_size]);
1398  std::fill(sendcounts.begin(), sendcounts.end(), 0);
1399 
1400  for(const auto & pair : entities2rank) {
1401  auto global_id = pair.first;
1402  const auto owner = pair.second;
1403  // who am i sending to
1404  ranks.assign(owner.ghost_ranks.begin(), owner.ghost_ranks.end());
1405  ranks.push_back(owner.rank);
1406  // we will be sending id+rank+offset+num_ghost+ghost_ranks
1407  auto n = 4 + owner.ghost_ranks.size();
1408  for(auto rank : ranks) {
1409  // we will be sending global ids
1410  if(rank != comm_rank) {
1411  auto i = senddispls[rank] + sendcounts[rank];
1412  sendbuf[i++] = global_id;
1413  sendbuf[i++] = owner.rank;
1414  sendbuf[i++] = owner.local_id;
1415  sendbuf[i++] = owner.ghost_ranks.size();
1416  for(auto r : owner.ghost_ranks)
1417  sendbuf[i++] = r;
1418  sendcounts[rank] += n;
1419  }
1420  }
1421  } // vert
1422 
1423  // send counts
1424  ret = MPI_Alltoall(sendcounts.data(), 1, mpi_size_t, recvcounts.data(), 1,
1425  mpi_size_t, MPI_COMM_WORLD);
1426  if(ret != MPI_SUCCESS)
1427  flog_error("Error communicating vertex counts");
1428 
1429  // how much info will we be receiving
1430  recvdispls[0] = 0;
1431  for(size_t r = 0; r < comm_size; ++r)
1432  recvdispls[r + 1] = recvdispls[r] + recvcounts[r];
1433 
1434  recvbuf.clear();
1435  recvbuf.resize(recvdispls[comm_size]);
1436 
1437  // now send the actual vertex info
1438  ret = alltoallv(sendbuf, sendcounts, senddispls, recvbuf, recvcounts,
1439  recvdispls, MPI_COMM_WORLD);
1440  if(ret != MPI_SUCCESS)
1441  flog_error("Error communicating vertices");
1442 
1443  // upack results
1444  for(size_t r = 0; r < comm_size; ++r) {
1445  for(size_t i = recvdispls[r]; i < recvdispls[r + 1];) {
1446  auto global_id = recvbuf[i];
1447  i++;
1448  auto rank = recvbuf[i];
1449  i++;
1450  auto local_id = recvbuf[i];
1451  i++;
1452  auto num_shared = recvbuf[i];
1453  i++;
1454  auto it = entities2rank.emplace(global_id, owner_t{rank, local_id});
1455  flog_assert(it.second,
1456  "did not insert vertex, which means multiple owners sent it");
1457  for(size_t j = 0; j < num_shared; ++j, ++i) {
1458  it.first->second.ghost_ranks.emplace_back(recvbuf[i]);
1459  }
1460  }
1461  }
1462 
1463  //----------------------------------------------------------------------------
1464  // Now we can set exclusive, shared and ghost
1465  //----------------------------------------------------------------------------
1466 
1467  // exclusive
1468  for(size_t local_id = 0; local_id < num_ents; ++local_id) {
1469  if(exclusive[local_id]) {
1470  auto global_id = local2global[local_id];
1471  entities.exclusive.emplace(global_id, comm_rank, local_id);
1472  }
1473  }
1474 
1475  // shared and ghost
1476  for(const auto pair : entities2rank) {
1477  auto global_id = pair.first;
1478  auto owner = pair.second;
1479  // if i am the owner, shared
1480  if(owner.rank == comm_rank) {
1481  flog_assert(owner.ghost_ranks.size(), "shared/ghost has no ghost ranks!");
1482  auto it = entities.shared.emplace(
1483  global_id, owner.rank, owner.local_id, owner.ghost_ranks);
1484  color_info.shared_users.insert(
1485  it.first->shared.begin(), it.first->shared.end());
1486  }
1487  // otherwise this "may" be a ghost
1488  else {
1489  auto is_ghost = std::binary_search(
1490  potential_ghost.begin(), potential_ghost.end(), global_id);
1491  if(is_ghost) {
1492  color_info.ghost_owners.insert(owner.rank);
1493  } // is ghost
1494  }
1495  }
1496 
1497  // entitiy summaries
1498  color_info.exclusive = entities.exclusive.size();
1499  color_info.shared = entities.shared.size();
1500  color_info.ghost = entities.ghost.size();
1501 }
1502 #endif
1503 
1507 template<std::size_t MESH_DIMENSION>
1508 void
1510  size_t dimension,
1511  std::vector<size_t> & local2global,
1512  std::map<size_t, size_t> & global2local) {
1513 
1514  int comm_size, comm_rank;
1515  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
1516  MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
1517 
1518  // the mpi data type for size_t
1519  const auto mpi_size_t = util::mpi_typetraits<size_t>::type();
1520 
1521  //----------------------------------------------------------------------------
1522  // Create a naive initial distribution of the vertices
1523  //----------------------------------------------------------------------------
1524 
1525  // first figure out the maximum global id on this rank
1526  const auto & vertex_local2global = md.local_to_global(0);
1527 
1528  size_t max_global_vert_id{0};
1529  for(auto v : vertex_local2global)
1530  max_global_vert_id = std::max(max_global_vert_id, v);
1531 
1532  // now the global max id
1533  size_t tot_verts{0};
1534  MPI_Allreduce(
1535  &max_global_vert_id, &tot_verts, 1, mpi_size_t, MPI_MAX, MPI_COMM_WORLD);
1536  tot_verts++;
1537 
1538  std::vector<size_t> vert_dist;
1539  subdivide(tot_verts, comm_size, vert_dist);
1540 
1541  //----------------------------------------------------------------------------
1542  // Send each edge to the edge owner
1543  //
1544  // The vertex with the lowest id owns the edge
1545  //----------------------------------------------------------------------------
1546 
1547  const auto & entities2vertex = md.entities_crs(dimension, 0);
1548 
1549  // map the vertices to an id
1550  std::map<std::vector<size_t>, size_t> entities;
1551 
1552  // storage for vertex sorting
1553  std::vector<size_t> sorted_vs;
1554 
1555  // a utility function for sorting and adding to the map
1556  auto add_to_map = [&](const auto & vs, auto & vertices2id, auto id) {
1557  sorted_vs.assign(vs.begin(), vs.end());
1558  std::sort(sorted_vs.begin(), sorted_vs.end());
1559  auto res = vertices2id.emplace(sorted_vs, id);
1560  return res.first;
1561  };
1562 
1563  // first count for storage
1564  std::vector<size_t> sendcounts(comm_size, 0);
1565  std::vector<size_t> senddispls(comm_size + 1);
1566 
1567  // storage for global ids
1568  std::vector<size_t> global_vs;
1569 
1570  for(const auto & vs : entities2vertex) {
1571  // convert to global ids
1572  global_vs.clear();
1573  global_vs.reserve(vs.size());
1574  for(auto v : vs)
1575  global_vs.emplace_back(vertex_local2global.at(v));
1576  // get the rank owner
1577  auto it = std::min_element(global_vs.begin(), global_vs.end());
1578  auto r = rank_owner(vert_dist, *it);
1579  // we will be sending the number of vertices, plus the vertices
1580  if(r != size_t(comm_rank))
1581  sendcounts[r] += 1 + vs.size();
1582  }
1583 
1584  // finish displacements
1585  senddispls[0] = 0;
1586  for(int r = 0; r < comm_size; ++r)
1587  senddispls[r + 1] = senddispls[r] + sendcounts[r];
1588 
1589  // now fill buffers
1590  std::vector<size_t> sendbuf(senddispls[comm_size]);
1591  std::fill(sendcounts.begin(), sendcounts.end(), 0);
1592 
1593  for(const auto & vs : entities2vertex) {
1594  // convert to global ids
1595  global_vs.clear();
1596  global_vs.reserve(vs.size());
1597  for(auto v : vs)
1598  global_vs.emplace_back(vertex_local2global.at(v));
1599  // get the rank owner
1600  auto it = std::min_element(global_vs.begin(), global_vs.end());
1601  auto r = rank_owner(vert_dist, *it);
1602  // we will be sending the number of vertices, plus the vertices
1603  if(r != size_t(comm_rank)) {
1604  // first the id and size
1605  auto j = senddispls[r] + sendcounts[r];
1606  sendbuf[j++] = vs.size();
1607  for(auto v : global_vs)
1608  sendbuf[j++] = v;
1609  // increment send counter
1610  sendcounts[r] = j - senddispls[r];
1611  }
1612  // otherwise, if its mine, add it
1613  else {
1614  add_to_map(global_vs, entities, entities.size());
1615  }
1616  }
1617 
1618  //----------------------------------------------------------------------------
1619  // Send information
1620  //----------------------------------------------------------------------------
1621 
1622  // send the counts
1623  std::vector<size_t> recvcounts(comm_size);
1624  auto ret = MPI_Alltoall(sendcounts.data(),
1625  1,
1626  mpi_size_t,
1627  recvcounts.data(),
1628  1,
1629  mpi_size_t,
1630  MPI_COMM_WORLD);
1631 
1632  if(ret != MPI_SUCCESS)
1633  flog_error("Error communicating vertex counts");
1634 
1635  // how much info will we be receiving
1636  std::vector<size_t> recvdispls(comm_size + 1);
1637  recvdispls[0] = 0;
1638  for(int r = 0; r < comm_size; ++r)
1639  recvdispls[r + 1] = recvdispls[r] + recvcounts[r];
1640  std::vector<size_t> recvbuf(recvdispls[comm_size]);
1641 
1642  // now send the actual vertex info
1643  ret = alltoallv(sendbuf,
1644  sendcounts,
1645  senddispls,
1646  recvbuf,
1647  recvcounts,
1648  recvdispls,
1649  MPI_COMM_WORLD);
1650 
1651  if(ret != MPI_SUCCESS)
1652  flog_error("Error communicating vertices");
1653 
1654  //----------------------------------------------------------------------------
1655  // Unpack results
1656  //----------------------------------------------------------------------------
1657 
1658  using pointer_t = std::decay_t<decltype(entities.begin())>;
1659 
1660  // keep track of who needs what
1661  std::map<size_t, std::vector<pointer_t>> rank_edges;
1662 
1663  for(int r = 0; r < comm_size; ++r) {
1664  auto & rank_data = rank_edges[r];
1665  for(size_t i = recvdispls[r]; i < recvdispls[r + 1];) {
1666  auto n = recvbuf[i];
1667  auto vs = util::make_array_ref(&recvbuf[i + 1], n);
1668  auto it = add_to_map(vs, entities, entities.size());
1669  rank_data.emplace_back(it);
1670  i += n + 1;
1671  }
1672 
1673  // sort and remove unique entries
1674  std::sort(rank_data.begin(),
1675  rank_data.end(),
1676  [](const auto & a, const auto & b) { return a->second < b->second; });
1677  auto last = std::unique(rank_data.begin(), rank_data.end());
1678  rank_data.erase(last, rank_data.end());
1679  }
1680 
1681  //----------------------------------------------------------------------------
1682  // Come up with a global numbering
1683  //----------------------------------------------------------------------------
1684 
1685  size_t my_edges{entities.size()};
1686 
1687  std::vector<size_t> edge_dist(comm_size + 1);
1688  edge_dist[0] = 0;
1689 
1690  MPI_Allgather(
1691  &my_edges, 1, mpi_size_t, &edge_dist[1], 1, mpi_size_t, MPI_COMM_WORLD);
1692 
1693  for(int r = 0; r < comm_size; ++r)
1694  edge_dist[r + 1] += edge_dist[r];
1695 
1696  // bump all my local edge counts up
1697  for(auto & pair : entities)
1698  pair.second += edge_dist[comm_rank];
1699 
1700  //----------------------------------------------------------------------------
1701  // Send back the finished edges to those that sent you the pairs
1702  //----------------------------------------------------------------------------
1703 
1704  // counts for storage
1705  std::fill(sendcounts.begin(), sendcounts.end(), 0);
1706  // and fill buffers
1707  sendbuf.clear();
1708 
1709  for(int r = 0; r < comm_size; ++r) {
1710  if(r == comm_rank)
1711  continue;
1712  // we will be sending the edge id, number of vertices, plus the vertices
1713  for(auto edge : rank_edges.at(r)) {
1714  auto global_id = edge->second;
1715  auto n = edge->first.size();
1716  sendcounts[r] += 2 + n;
1717  sendbuf.emplace_back(global_id);
1718  sendbuf.emplace_back(n);
1719  for(auto v : edge->first)
1720  sendbuf.emplace_back(v);
1721  }
1722  }
1723 
1724  // finish displacements
1725  senddispls[0] = 0;
1726  for(int r = 0; r < comm_size; ++r)
1727  senddispls[r + 1] = senddispls[r] + sendcounts[r];
1728 
1729  //----------------------------------------------------------------------------
1730  // Send information
1731  //----------------------------------------------------------------------------
1732 
1733  // send the counts
1734  ret = MPI_Alltoall(sendcounts.data(),
1735  1,
1736  mpi_size_t,
1737  recvcounts.data(),
1738  1,
1739  mpi_size_t,
1740  MPI_COMM_WORLD);
1741 
1742  if(ret != MPI_SUCCESS)
1743  flog_error("Error communicating vertex counts");
1744 
1745  // how much info will we be receiving
1746  recvdispls[0] = 0;
1747  for(int r = 0; r < comm_size; ++r)
1748  recvdispls[r + 1] = recvdispls[r] + recvcounts[r];
1749  recvbuf.clear();
1750  recvbuf.resize(recvdispls[comm_size]);
1751 
1752  // now send the actual vertex info
1753  ret = alltoallv(sendbuf,
1754  sendcounts,
1755  senddispls,
1756  recvbuf,
1757  recvcounts,
1758  recvdispls,
1759  MPI_COMM_WORLD);
1760 
1761  if(ret != MPI_SUCCESS)
1762  flog_error("Error communicating vertices");
1763 
1764  //----------------------------------------------------------------------------
1765  // Unpack results
1766  //----------------------------------------------------------------------------
1767 
1768  for(int r = 0; r < comm_size; ++r) {
1769  for(size_t i = recvdispls[r]; i < recvdispls[r + 1];) {
1770  auto global_id = recvbuf[i];
1771  auto n = recvbuf[i + 1];
1772  auto vs = util::make_array_ref(&recvbuf[i + 2], n);
1773  i += n + 2;
1774  add_to_map(vs, entities, global_id);
1775  }
1776  }
1777 
1778  //----------------------------------------------------------------------------
1779  // Figure local ids for the edges
1780  //----------------------------------------------------------------------------
1781 
1782  for(const auto & vs : entities2vertex) {
1783  // convert to global ids
1784  global_vs.clear();
1785  global_vs.reserve(vs.size());
1786 
1787  for(auto v : vs)
1788  global_vs.emplace_back(vertex_local2global.at(v));
1789 
1790  // sort the vertices and find the edge
1791  std::sort(global_vs.begin(), global_vs.end());
1792  auto it = entities.find(global_vs);
1793  flog_assert(it != entities.end(), "messed up setting entity ids");
1794 
1795  // figure out the edges global id
1796  auto global_id = it->second;
1797  global2local.emplace(global_id, local2global.size());
1798  local2global.emplace_back(global_id);
1799  }
1800 }
1801 
1805 inline void
1807  const std::vector<size_t> & local2global,
1808  const index_coloring & from_entities,
1809  std::vector<size_t> & from_ids,
1810  util::crs & connectivity) {
1811 
1812  int comm_size, comm_rank;
1813  MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
1814  MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
1815 
1816  // the mpi data type for size_t
1817  const auto mpi_size_t = util::mpi_typetraits<size_t>::type();
1818 
1819  //----------------------------------------------------------------------------
1820  // Determine cell-to-vertex connecitivity for my shared cells
1821  //----------------------------------------------------------------------------
1822 
1823  // first count for storage
1824  std::vector<size_t> sendcounts(comm_size, 0);
1825  std::vector<size_t> senddispls(comm_size + 1);
1826 
1827  for(const auto & e : from_entities.shared) {
1828  // get cell info
1829  auto start = from2to.offsets[e.offset];
1830  auto end = from2to.offsets[e.offset + 1];
1831  auto n = end - start;
1832  // loop over shared ranks
1833  for(auto r : e.shared) {
1834  // we will be sending the cell global id, number of vertices, plus
1835  // vertices
1836  if(r != size_t(comm_rank))
1837  sendcounts[r] += 2 + n;
1838  }
1839  }
1840 
1841  // finish displacements
1842  senddispls[0] = 0;
1843  for(int r = 0; r < comm_size; ++r)
1844  senddispls[r + 1] = senddispls[r] + sendcounts[r];
1845 
1846  // now fill buffers
1847  std::vector<size_t> sendbuf(senddispls[comm_size]);
1848  std::fill(sendcounts.begin(), sendcounts.end(), 0);
1849 
1850  for(const auto & e : from_entities.shared) {
1851  // get cell info
1852  auto start = from2to.offsets[e.offset];
1853  auto end = from2to.offsets[e.offset + 1];
1854  auto n = end - start;
1855  // loop over shared ranks
1856  for(auto r : e.shared) {
1857  // we will be sending global id, number of vertices, plus vertices
1858  if(r != size_t(comm_rank)) {
1859  // first the id and size
1860  auto j = senddispls[r] + sendcounts[r];
1861  sendbuf[j++] = e.id;
1862  sendbuf[j++] = n;
1863  for(auto i = start; i < end; ++i) {
1864  auto local_id = from2to.indices[i];
1865  auto global_id = local2global[local_id];
1866  sendbuf[j++] = global_id;
1867  }
1868  // increment send counter
1869  sendcounts[r] = j - senddispls[r];
1870  }
1871  } // shared ranks
1872  }
1873 
1874  //----------------------------------------------------------------------------
1875  // Send shared information
1876  //----------------------------------------------------------------------------
1877 
1878  // send the counts
1879  std::vector<size_t> recvcounts(comm_size);
1880  auto ret = MPI_Alltoall(sendcounts.data(),
1881  1,
1882  mpi_size_t,
1883  recvcounts.data(),
1884  1,
1885  mpi_size_t,
1886  MPI_COMM_WORLD);
1887 
1888  if(ret != MPI_SUCCESS)
1889  flog_error("Error communicating vertex counts");
1890 
1891  // how much info will we be receiving
1892  std::vector<size_t> recvdispls(comm_size + 1);
1893  recvdispls[0] = 0;
1894  for(int r = 0; r < comm_size; ++r)
1895  recvdispls[r + 1] = recvdispls[r] + recvcounts[r];
1896  std::vector<size_t> recvbuf(recvdispls[comm_size]);
1897 
1898  // now send the actual vertex info
1899  ret = alltoallv(sendbuf,
1900  sendcounts,
1901  senddispls,
1902  recvbuf,
1903  recvcounts,
1904  recvdispls,
1905  MPI_COMM_WORLD);
1906 
1907  if(ret != MPI_SUCCESS)
1908  flog_error("Error communicating vertices");
1909 
1910  //----------------------------------------------------------------------------
1911  // Unpack results
1912  //----------------------------------------------------------------------------
1913 
1914  connectivity.clear();
1915  connectivity.offsets.emplace_back(0);
1916  from_ids.clear();
1917 
1918  for(int r = 0; r < comm_size; ++r) {
1919  for(size_t i = recvdispls[r]; i < recvdispls[r + 1];) {
1920  // global id
1921  auto global_id = recvbuf[i];
1922  from_ids.emplace_back(global_id);
1923  i++;
1924  // num of vertices
1925  size_t n = recvbuf[i];
1926  i++;
1927  // no sift through ghost vertices
1928  for(size_t j = 0; j < n; ++j, ++i) {
1929  // local and global ids of the vertex (relative to sender)
1930  auto ent_global_id = recvbuf[i];
1931  // Don't bother checking if i have this vertex yet, even if I already
1932  // have it, it might be a ghost
1933  connectivity.indices.emplace_back(ent_global_id);
1934  } // vertices
1935  // now add final offset
1936  connectivity.offsets.emplace_back(connectivity.offsets.back() + n);
1937  }
1938  }
1939 }
1940 
1941 template<std::size_t MESH_DIMENSION>
1942 void
1944  size_t from_dimension,
1945  size_t to_dimension,
1946  const index_coloring & from_entities,
1947  std::vector<size_t> & from_ids,
1948  util::crs & connectivity) {
1949 
1950  const auto & to_local2global = md.local_to_global(to_dimension);
1951  const auto & from2to = md.entities_crs(from_dimension, to_dimension);
1952 
1954  from2to, to_local2global, from_entities, from_ids, connectivity);
1955 }
1956 
1957 } // namespace unstructured_impl
1958 } // namespace topo
1959 } // namespace flecsi
size_t shared
The number of shared indices.
Definition: coloring_types.hh:35
void clear()
clears the current storage
Definition: crs.hh:112
size_t exclusive
The number of exclusive indices.
Definition: coloring_types.hh:32
Definition: crs.hh:48
unsigned char byte_t
a byte type used for migrating data
Definition: parallel_definition.hh:31
void migrate(size_t dimension, const std::vector< size_t > &partitioning, util::dcrs &dcrs, parallel_definition< DIMENSION > &md)
Migrate pieces of a mesh from one processor to another.
Definition: dcrs_utils.hh:759
#define flog(severity)
Definition: flog.hh:136
util::dcrs make_dcrs(const definition< DIMENSION > &md)
Definition: dcrs_utils.hh:117
void make_dcrs_distributed(const parallel_definition< MESH_DIMENSION > &md, size_t from_dimension, size_t to_dimension, size_t min_connections, util::dcrs &dcrs)
Create distributed CRS representation of the graph.
Definition: dcrs_utils.hh:390
#define flog_assert(test, message)
Definition: flog.hh:411
void make_dcrs_have_connectivity(const definition< DIMENSION > &md, util::dcrs &dcrs)
Create distributed CRS representation of the graph.
Definition: dcrs_utils.hh:211
size_t ghost
The number of ghost indices.
Definition: coloring_types.hh:38
std::set< size_t > shared_users
The aggregate set of colors that depend on our shared indices.
Definition: coloring_types.hh:41
virtual std::vector< size_t > entities(size_t from_dimension, size_t to_dimension, size_t id) const =0
int start(const std::function< int()> &action)
Definition: execution.hh:69
Definition: flog.hh:82
void match_ids(const parallel_definition< MESH_DIMENSION > &md, size_t dimension, std::vector< size_t > &local2global, std::map< size_t, size_t > &global2local)
Color an auxiliary index space like vertices or edges.
Definition: dcrs_utils.hh:1509
std::set< size_t > ghost_owners
The aggregate set of colors that we depend on for ghosts.
Definition: coloring_types.hh:44
Definition: index_coloring.hh:28
T rank_owner(const Vector &distribution, T i)
Simple utility for determining which rank owns an id.
Definition: dcrs_utils.hh:357
Definition: crs.hh:254
void erase(const std::vector< size_t > &ids)
erase a bunch of ids
Definition: crs.hh:65
#define flog_error(message)
Definition: flog.hh:247
virtual size_t num_entities(size_t dimension) const =0
Definition: parallel_definition.hh:27
std::set< size_t > naive_coloring(definition< MESH_DIMENSION > &md)
Definition: dcrs_utils.hh:54
Definition: mpi_type_traits.hh:38
Definition: coloring_types.hh:29
void ghost_connectivity(const util::crs &from2to, const std::vector< size_t > &local2global, const index_coloring &from_entities, std::vector< size_t > &from_ids, util::crs &connectivity)
Color an auxiliary index space like vertices or edges.
Definition: dcrs_utils.hh:1806
void subdivide(size_t nelem, size_t npart, Vector &dist)
A simple utility for subdividing an index space into several parts.
Definition: dcrs_utils.hh:368
Definition: coloring_types.hh:73
Definition: control.hh:31