Reference documentation for deal.II version 8.1.0
matrix_free.templates.h
1 // ---------------------------------------------------------------------
2 // @f$Id: matrix_free.templates.h 31932 2013-12-08 02:15:54Z heister @f$
3 //
4 // Copyright (C) 2011 - 2013 by the deal.II authors
5 //
6 // This file is part of the deal.II library.
7 //
8 // The deal.II library is free software; you can use it, redistribute
9 // it, and/or modify it under the terms of the GNU Lesser General
10 // Public License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 // The full text of the license can be found in the file LICENSE at
13 // the top level of the deal.II distribution.
14 //
15 // ---------------------------------------------------------------------
16 
17 
18 #include <deal.II/base/utilities.h>
19 #include <deal.II/base/memory_consumption.h>
20 #include <deal.II/base/tensor_product_polynomials.h>
21 #include <deal.II/base/polynomials_piecewise.h>
22 #include <deal.II/base/mpi.h>
23 #include <deal.II/dofs/dof_accessor.h>
24 #include <deal.II/fe/fe_poly.h>
25 #include <deal.II/hp/q_collection.h>
26 #include <deal.II/distributed/tria.h>
27 
28 #include <deal.II/matrix_free/matrix_free.h>
29 #include <deal.II/matrix_free/shape_info.templates.h>
30 #include <deal.II/matrix_free/mapping_info.templates.h>
31 #include <deal.II/matrix_free/dof_info.templates.h>
32 
33 
35 
36 
37 // --------------------- MatrixFree -----------------------------------
38 
39 template <int dim, typename Number>
41  :
42  indices_are_initialized (false),
43  mapping_is_initialized (false)
44 {}
45 
46 
47 
48 template <int dim, typename Number>
50 {}
51 
52 
53 
54 template <int dim, typename Number>
57 {
58  clear ();
60  dof_info = v.dof_info;
66  task_info = v.task_info;
67  size_info = v.size_info;
70 }
71 
72 
73 
74 namespace internal
75 {
76  template <int dim>
77  void assert_communicator_equality (const ::Triangulation<dim> &tria,
78  const MPI_Comm &comm_mf)
79  {
80 #ifdef DEAL_II_WITH_MPI
82  dynamic_cast<const parallel::distributed::Triangulation<dim>*>(&tria);
83  if (dist_tria != 0)
84  {
86  {
87  int communicators_same = 0;
88  MPI_Comm_compare (dist_tria->get_communicator(), comm_mf,
89  &communicators_same);
90  Assert (communicators_same == MPI_IDENT ||
91  communicators_same == MPI_CONGRUENT,
92  ExcMessage ("MPI communicator in parallel::distributed::Triangulation "
93  "and matrix free class must be the same!"));
94  }
95  }
96 #else
97  (void)tria;
98  (void)comm_mf;
99 #endif
100  }
101 }
102 
103 
104 
105 template <int dim, typename Number>
107 internal_reinit(const Mapping<dim> &mapping,
108  const std::vector<const DoFHandler<dim> *> &dof_handler,
109  const std::vector<const ConstraintMatrix *> &constraint,
110  const std::vector<IndexSet> &locally_owned_set,
111  const std::vector<hp::QCollection<1> > &quad,
112  const MatrixFree<dim,Number>::AdditionalData additional_data)
113 {
114  if (additional_data.initialize_indices == true)
115  {
116  clear();
117  Assert (dof_handler.size() > 0, ExcMessage("No DoFHandler is given."));
118  AssertDimension (dof_handler.size(), constraint.size());
119  AssertDimension (dof_handler.size(), locally_owned_set.size());
120 
121  // set variables that are independent of FE
122  internal::assert_communicator_equality (dof_handler[0]->get_tria(),
123  additional_data.mpi_communicator);
124  size_info.communicator = additional_data.mpi_communicator;
126  {
127  size_info.my_pid =
129  size_info.n_procs =
131  }
132  else
133  {
134  size_info.my_pid = 0;
135  size_info.n_procs = 1;
136  }
137 
138  initialize_dof_handlers (dof_handler, additional_data.level_mg_handler);
139  for (unsigned int no=0; no<dof_handler.size(); ++no)
140  dof_info[no].store_plain_indices = additional_data.store_plain_indices;
141 
142  // initialize the basic multithreading information that needs to be
143  // passed to the DoFInfo structure
144 #ifdef DEAL_II_WITH_THREADS
145  if (additional_data.tasks_parallel_scheme != AdditionalData::none &&
146  multithread_info.n_threads() > 1)
147  {
148  task_info.use_multithreading = true;
149  task_info.block_size = additional_data.tasks_block_size;
150  task_info.use_partition_partition =
151  (additional_data.tasks_parallel_scheme ==
152  AdditionalData::partition_partition ? true : false);
153  task_info.use_coloring_only =
154  (additional_data.tasks_parallel_scheme ==
155  AdditionalData::color ? true : false);
156  }
157  else
158 #endif
159  task_info.use_multithreading = false;
160 
161  // set dof_indices together with constraint_indicator and
162  // constraint_pool_data. It also reorders the way cells are gone through
163  // (to separate cells with overlap to other processors from others
164  // without).
165  initialize_indices (constraint, locally_owned_set);
166  }
167 
168  // initialize bare structures
169  else if (dof_info.size() != dof_handler.size())
170  {
171  initialize_dof_handlers(dof_handler, additional_data.level_mg_handler);
172  std::vector<unsigned int> dummy;
175  dummy, dummy);
176  for (unsigned int i=0; i<dof_info.size(); ++i)
177  {
178  dof_info[i].dimension = dim;
179  dof_info[i].n_components = dof_handler[i]->get_fe().element_multiplicity(0);
180  dof_info[i].dofs_per_cell.push_back(dof_handler[i]->get_fe().dofs_per_cell);
181  dof_info[i].row_starts.resize(size_info.n_macro_cells+1);
182  dof_info[i].row_starts.back()[2] =
184 
185  // if indices are not initialized, the cell_level_index might not be
186  // divisible by the vectorization length. But it must be for
187  // mapping_info...
189  != 0)
190  cell_level_index.push_back(cell_level_index.back());
191  }
192  }
193 
194  // Reads out the FE information and stores the shape function values,
195  // gradients and Hessians for quadrature points.
196  const unsigned int n_fe = dof_handler.size();
197  const unsigned int n_quad = quad.size();
198  shape_info.reinit (TableIndices<4>(n_fe, n_quad, 1, 1));
199  for (unsigned int no=0; no<n_fe; no++)
200  {
201  const FiniteElement<dim> &fe = dof_handler[no]->get_fe();
202  for (unsigned int nq =0; nq<n_quad; nq++)
203  {
204  AssertDimension (quad[nq].size(), 1);
205  shape_info(no,nq,0,0).reinit(quad[nq][0], fe.base_element(0));
206  }
207  }
208 
209  // Evaluates transformations from unit to real cell, Jacobian determinants,
210  // quadrature points in real space, based on the ordering of the cells
211  // determined in @p extract_local_to_global_indices. The algorithm assumes
212  // that the active FE index for the transformations is given the active FE
213  // index in the zeroth DoFHandler. TODO: how do things look like in the more
214  // general case?
215  if (additional_data.initialize_mapping == true)
216  {
217  mapping_info.initialize (dof_handler[0]->get_tria(), cell_level_index,
218  dof_info[0].cell_active_fe_index, mapping, quad,
219  additional_data.mapping_update_flags);
220 
221  mapping_is_initialized = true;
222  }
223 }
224 
225 
226 
227 template <int dim, typename Number>
229 internal_reinit(const Mapping<dim> &mapping,
230  const std::vector<const hp::DoFHandler<dim>*> &dof_handler,
231  const std::vector<const ConstraintMatrix *> &constraint,
232  const std::vector<IndexSet> &locally_owned_set,
233  const std::vector<hp::QCollection<1> > &quad,
234  const MatrixFree<dim,Number>::AdditionalData additional_data)
235 {
236  if (additional_data.initialize_indices == true)
237  {
238  clear();
239  Assert (dof_handler.size() > 0, ExcMessage("No DoFHandler is given."));
240  AssertDimension (dof_handler.size(), constraint.size());
241  AssertDimension (dof_handler.size(), locally_owned_set.size());
242 
243  // set variables that are independent of FE
244  internal::assert_communicator_equality (dof_handler[0]->get_tria(),
245  additional_data.mpi_communicator);
246  size_info.communicator = additional_data.mpi_communicator;
248  {
249  size_info.my_pid =
251  size_info.n_procs =
253  }
254  else
255  {
256  size_info.my_pid = 0;
257  size_info.n_procs = 1;
258  }
259 
260  initialize_dof_handlers (dof_handler, additional_data.level_mg_handler);
261  for (unsigned int no=0; no<dof_handler.size(); ++no)
262  dof_info[no].store_plain_indices = additional_data.store_plain_indices;
263 
264  // initialize the basic multithreading information that needs to be
265  // passed to the DoFInfo structure
266 #ifdef DEAL_II_WITH_THREADS
267  if (additional_data.tasks_parallel_scheme != AdditionalData::none &&
268  multithread_info.n_threads() > 1)
269  {
270  task_info.use_multithreading = true;
271  task_info.block_size = additional_data.tasks_block_size;
272  task_info.use_partition_partition =
273  (additional_data.tasks_parallel_scheme ==
274  AdditionalData::partition_partition ? true : false);
275  task_info.use_coloring_only =
276  (additional_data.tasks_parallel_scheme ==
277  AdditionalData::color ? true : false);
278  }
279  else
280 #endif
281  task_info.use_multithreading = false;
282 
283  // set dof_indices together with constraint_indicator and
284  // constraint_pool_data. It also reorders the way cells are gone through
285  // (to separate cells with overlap to other processors from others
286  // without).
287  initialize_indices (constraint, locally_owned_set);
288  }
289 
290  // initialize bare structures
291  else if (dof_info.size() != dof_handler.size())
292  {
293  initialize_dof_handlers(dof_handler, additional_data.level_mg_handler);
294  std::vector<unsigned int> dummy;
297  dummy, dummy);
298  for (unsigned int i=0; i<dof_info.size(); ++i)
299  {
300  Assert(dof_handler[i]->get_fe().size() == 1, ExcNotImplemented());
301  dof_info[i].dimension = dim;
302  dof_info[i].n_components = dof_handler[i]->get_fe()[0].element_multiplicity(0);
303  dof_info[i].dofs_per_cell.push_back(dof_handler[i]->get_fe()[0].dofs_per_cell);
304  dof_info[i].row_starts.resize(size_info.n_macro_cells+1);
305  dof_info[i].row_starts.back()[2] =
307 
308  // if indices are not initialized, the cell_level_index might not be
309  // divisible by the vectorization length. But it must be for
310  // mapping_info...
312  != 0)
313  cell_level_index.push_back(cell_level_index.back());
314  }
315  }
316 
317  // Reads out the FE information and stores the shape function values,
318  // gradients and Hessians for quadrature points.
319  const unsigned int n_components = dof_handler.size();
320  const unsigned int n_quad = quad.size();
321  unsigned int n_fe_in_collection = 0;
322  for (unsigned int i=0; i<n_components; ++i)
323  n_fe_in_collection = std::max (n_fe_in_collection,
324  dof_handler[i]->get_fe().size());
325  unsigned int n_quad_in_collection = 0;
326  for (unsigned int q=0; q<n_quad; ++q)
327  n_quad_in_collection = std::max (n_quad_in_collection, quad[q].size());
328  shape_info.reinit (TableIndices<4>(n_components, n_quad,
329  n_fe_in_collection,
330  n_quad_in_collection));
331  for (unsigned int no=0; no<n_components; no++)
332  for (unsigned int fe_no=0; fe_no<dof_handler[no]->get_fe().size(); ++fe_no)
333  {
334  const FiniteElement<dim> &fe = dof_handler[no]->get_fe()[fe_no];
335  for (unsigned int nq =0; nq<n_quad; nq++)
336  for (unsigned int q_no=0; q_no<quad[nq].size(); ++q_no)
337  shape_info(no,nq,fe_no,q_no).reinit (quad[nq][q_no],
338  fe.base_element(0));
339  }
340 
341  // Evaluates transformations from unit to real cell, Jacobian determinants,
342  // quadrature points in real space, based on the ordering of the cells
343  // determined in @p extract_local_to_global_indices.
344  if (additional_data.initialize_mapping == true)
345  {
346  mapping_info.initialize (dof_handler[0]->get_tria(), cell_level_index,
347  dof_info[0].cell_active_fe_index, mapping, quad,
348  additional_data.mapping_update_flags);
349 
350  mapping_is_initialized = true;
351  }
352 }
353 
354 
355 
356 namespace internal
357 {
358 
359  // steps through all children and adds the
360  // active cells recursively
361  template <typename InIterator>
362  void resolve_cell (const InIterator &cell,
363  std::vector<std::pair<unsigned int,unsigned int> > &cell_its,
364  const unsigned int subdomain_id)
365  {
366  if (cell->has_children())
367  for (unsigned int child=0; child<cell->n_children(); ++child)
368  resolve_cell (cell->child(child), cell_its,
369  subdomain_id);
370  else if (cell->subdomain_id() == subdomain_id)
371  {
372  Assert (cell->active(), ExcInternalError());
373  cell_its.push_back (std::pair<unsigned int,unsigned int>
374  (cell->level(), cell->index()));
375  }
376  }
377 }
378 
379 
380 
381 template <int dim, typename Number>
383 initialize_dof_handlers (const std::vector<const DoFHandler<dim>*> &dof_handler,
384  const unsigned int level)
385 {
386  dof_handlers.active_dof_handler = DoFHandlers::usual;
387  dof_handlers.level = level;
388  dof_handlers.n_dof_handlers = dof_handler.size();
389  dof_handlers.dof_handler.resize (dof_handlers.n_dof_handlers);
390  for (unsigned int no=0; no<dof_handlers.n_dof_handlers; ++no)
391  dof_handlers.dof_handler[no] = dof_handler[no];
392 
393  dof_info.resize (dof_handlers.n_dof_handlers);
394 
395  // go through cells on zeroth level and then successively step down into
396  // children. This gives a z-ordering of the cells, which is beneficial when
397  // setting up neighboring relations between cells for thread parallelization
398  const unsigned int n_mpi_procs = size_info.n_procs;
399  const unsigned int my_pid = size_info.my_pid;
400 
401  const Triangulation<dim> &tria = dof_handlers.dof_handler[0]->get_tria();
402  if (level == numbers::invalid_unsigned_int)
403  {
404  if (n_mpi_procs == 1)
405  cell_level_index.reserve (tria.n_active_cells());
406  typename Triangulation<dim>::cell_iterator cell = tria.begin(0),
407  end_cell = tria.end(0);
408  for ( ; cell != end_cell; ++cell)
409  internal::resolve_cell (cell, cell_level_index, my_pid);
410  }
411  else
412  {
413  AssertIndexRange (level, tria.n_levels());
414  cell_level_index.reserve (tria.n_cells(level));
415  typename Triangulation<dim>::cell_iterator cell = tria.begin(level),
416  end_cell = tria.end(level);
417  for ( ; cell != end_cell; ++cell)
418  cell_level_index.push_back (std::pair<unsigned int,unsigned int>
419  (cell->level(), cell->index()));
420  }
421 }
422 
423 
424 
425 template <int dim, typename Number>
427 initialize_dof_handlers (const std::vector<const hp::DoFHandler<dim>*> &dof_handler,
428  const unsigned int)
429 {
430  dof_handlers.active_dof_handler = DoFHandlers::hp;
431  dof_handlers.n_dof_handlers = dof_handler.size();
432  dof_handlers.hp_dof_handler.resize (dof_handlers.n_dof_handlers);
433  for (unsigned int no=0; no<dof_handlers.n_dof_handlers; ++no)
434  dof_handlers.hp_dof_handler[no] = dof_handler[no];
435 
436  dof_info.resize (dof_handlers.n_dof_handlers);
437 
438  // go through cells on zeroth level and then successively step down into
439  // children. This gives a z-ordering of the cells, which is beneficial when
440  // setting up neighboring relations between cells for thread parallelization
441  const unsigned int n_mpi_procs = size_info.n_procs;
442  const unsigned int my_pid = size_info.my_pid;
443 
444  // if we have no level given, use the same as for the standard DoFHandler,
445  // otherwise we must loop through the respective level
446  const Triangulation<dim> &tria = dof_handler[0]->get_tria();
447 
448  if (n_mpi_procs == 1)
449  {
450  cell_level_index.reserve (tria.n_active_cells());
451  }
452  typename hp::DoFHandler<dim>::cell_iterator cell = dof_handler[0]->begin(0),
453  end_cell = dof_handler[0]->end(0);
454  for ( ; cell != end_cell; ++cell)
455  internal::resolve_cell (cell, cell_level_index,
456  my_pid);
457 }
458 
459 
460 
461 template <int dim, typename Number>
463 (const std::vector<const ConstraintMatrix *> &constraint,
464  const std::vector<IndexSet> &locally_owned_set)
465 {
466  const unsigned int n_fe = dof_handlers.n_dof_handlers;
467  const unsigned int n_active_cells = cell_level_index.size();
468 
469  AssertDimension (n_active_cells, cell_level_index.size());
470  AssertDimension (n_fe, locally_owned_set.size());
471  AssertDimension (n_fe, constraint.size());
472 
473  std::vector<types::global_dof_index> local_dof_indices;
474  std::vector<std::vector<std::vector<unsigned int> > > lexicographic_inv(n_fe);
475 
477  std::vector<unsigned int> constraint_indices;
478 
479  for (unsigned int no=0; no<n_fe; ++no)
480  {
481  std::vector<const FiniteElement<dim>*> fes;
482  if (dof_handlers.active_dof_handler == DoFHandlers::hp)
483  {
484  const hp::DoFHandler<dim> *hpdof = dof_handlers.hp_dof_handler[no];
485  const hp::FECollection<dim> &fe = hpdof->get_fe();
486  for (unsigned int f=0; f<fe.size(); ++f)
487  fes.push_back (&fe[f]);
488 
489  dof_info[no].max_fe_index = fe.size();
490  dof_info[no].fe_index_conversion.resize (fe.size());
491  for (unsigned int ind=0; ind<hpdof->get_fe().size(); ++ind)
492  dof_info[no].fe_index_conversion[ind] =
493  std::pair<unsigned int,unsigned int>(fe[ind].degree,
494  fe[ind].dofs_per_cell);
495  if (fe.size() > 1)
496  dof_info[no].cell_active_fe_index.resize(n_active_cells,
498  }
499  else
500  {
501  const DoFHandler<dim> *dofh =&*dof_handlers.dof_handler[no];
502  fes.push_back (&dofh->get_fe());
503  dof_info[no].max_fe_index = 1;
504  dof_info[no].fe_index_conversion.resize (1);
505  dof_info[no].fe_index_conversion[0] =
506  std::pair<unsigned int,unsigned int>(fes.back()->degree,
507  fes.back()->dofs_per_cell);
508  }
509 
510  lexicographic_inv[no].resize (fes.size());
511  for (unsigned int fe_index = 0; fe_index<fes.size(); ++fe_index)
512  {
513  const FiniteElement<dim> &fe = *fes[fe_index];
514  Assert (fe.n_base_elements() == 1,
515  ExcMessage ("MatrixFree only works for DoFHandler with one base element"));
516  const unsigned int n_fe_components = fe.element_multiplicity (0);
517 
518  // cache number of finite elements and dofs_per_cell
519  dof_info[no].dofs_per_cell.push_back (fe.dofs_per_cell);
520  dof_info[no].dofs_per_face.push_back (fe.dofs_per_face);
521  dof_info[no].dimension = dim;
522  dof_info[no].n_components = n_fe_components;
523 
524  // get permutation that gives lexicographic renumbering of the cell
525  // dofs renumber (this is necessary for FE_Q, for example, since
526  // there the vertex DoFs come first, which is incompatible with the
527  // lexicographic ordering necessary to apply tensor products
528  // efficiently)
529  const FE_Poly<TensorProductPolynomials<dim>,dim,dim> *fe_poly =
530  dynamic_cast<const FE_Poly<TensorProductPolynomials<dim>,dim,dim>*>
531  (&fe.base_element(0));
533  PiecewisePolynomial<double> >,dim,dim> *fe_poly_piece =
534  dynamic_cast<const FE_Poly<TensorProductPolynomials<dim,
536  (&fe.base_element(0));
537 
538  // This class currently only works for elements derived from
539  // FE_Poly<TensorProductPolynomials<dim>,dim,dim> or piecewise
540  // polynomials. For any other element, the dynamic casts above will
541  // fail and give fe_poly == 0.
542  Assert (fe_poly != 0 || fe_poly_piece != 0, ExcNotImplemented());
543  if (n_fe_components == 1)
544  {
545  if (fe_poly != 0)
546  lexicographic_inv[no][fe_index] =
548  else
549  lexicographic_inv[no][fe_index] =
550  fe_poly_piece->get_poly_space_numbering_inverse();
551  AssertDimension (lexicographic_inv[no][fe_index].size(),
552  dof_info[no].dofs_per_cell[fe_index]);
553  }
554  else
555  {
556  // ok, we have more than one component
557  Assert (n_fe_components > 1, ExcInternalError());
558  std::vector<unsigned int> scalar_lex =
559  fe_poly != 0 ? fe_poly->get_poly_space_numbering() :
560  fe_poly_piece->get_poly_space_numbering();
561  AssertDimension (scalar_lex.size() * n_fe_components,
562  dof_info[no].dofs_per_cell[fe_index]);
563  std::vector<unsigned int> lexicographic (dof_info[no].dofs_per_cell[fe_index]);
564  for (unsigned int comp=0; comp<n_fe_components; ++comp)
565  for (unsigned int i=0; i<scalar_lex.size(); ++i)
566  lexicographic[fe.component_to_system_index(comp,i)]
567  = scalar_lex.size () * comp + scalar_lex[i];
568 
569  // invert numbering
570  lexicographic_inv[no][fe_index] =
571  Utilities::invert_permutation(lexicographic);
572  }
573  AssertDimension (lexicographic_inv[no][fe_index].size(),
574  dof_info[no].dofs_per_cell[fe_index]);
575  }
576 
577  // set locally owned range for each component
578  Assert (locally_owned_set[no].is_contiguous(), ExcNotImplemented());
579  dof_info[no].vector_partitioner.reset
580  (new Utilities::MPI::Partitioner(locally_owned_set[no], size_info.communicator));
581 
582  // initialize the arrays for indices
583  dof_info[no].row_starts.resize (n_active_cells+1);
584  dof_info[no].row_starts[0][0] = 0;
585  dof_info[no].row_starts[0][1] = 0;
586  dof_info[no].row_starts[0][2] = 0;
587  dof_info[no].dof_indices.reserve
588  ((n_active_cells*dof_info[no].dofs_per_cell[0]*3)/2);
589 
590  // cache the constrained indices for use in matrix-vector products
591  {
593  start_index = dof_info[no].vector_partitioner->local_range().first,
594  end_index = dof_info[no].vector_partitioner->local_range().second;
595  for (types::global_dof_index i=start_index; i<end_index; ++i)
596  if (constraint[no]->is_constrained(i)==true)
597  dof_info[no].constrained_dofs.
598  push_back(static_cast<unsigned int>(i-start_index));
599  }
600  }
601 
602  // extract all the global indices associated with the computation, and form
603  // the ghost indices
604  std::vector<unsigned int> boundary_cells;
605  for (unsigned int counter = 0 ; counter < n_active_cells ; ++counter)
606  {
607  bool cell_at_boundary = false;
608  for (unsigned int no=0; no<n_fe; ++no)
609  {
610  // OK, read indices from standard DoFHandler in the usual way
611  if (dof_handlers.active_dof_handler == DoFHandlers::usual &&
613  {
614  const DoFHandler<dim> *dofh = &*dof_handlers.dof_handler[no];
616  cell_it (&dofh->get_tria(),
617  cell_level_index[counter].first,
618  cell_level_index[counter].second,
619  dofh);
620  local_dof_indices.resize (dof_info[no].dofs_per_cell[0]);
621  cell_it->get_dof_indices(local_dof_indices);
622  dof_info[no].read_dof_indices (local_dof_indices,
623  lexicographic_inv[no][0],
624  *constraint[no], counter,
625  constraint_values,
626  cell_at_boundary);
627  }
628  // ok, now we are requested to use a level in a MGDoFHandler
629  else if (dof_handlers.active_dof_handler == DoFHandlers::usual &&
631  {
632  const DoFHandler<dim> *dofh = dof_handlers.dof_handler[no];
633  AssertIndexRange (dof_handlers.level, dofh->get_tria().n_levels());
635  cell_it (&dofh->get_tria(),
636  cell_level_index[counter].first,
637  cell_level_index[counter].second,
638  dofh);
639  local_dof_indices.resize (dof_info[no].dofs_per_cell[0]);
640  cell_it->get_mg_dof_indices(local_dof_indices);
641  dof_info[no].read_dof_indices (local_dof_indices,
642  lexicographic_inv[no][0],
643  *constraint[no], counter,
644  constraint_values,
645  cell_at_boundary);
646  }
647  else if (dof_handlers.active_dof_handler == DoFHandlers::hp)
648  {
649  const hp::DoFHandler<dim> *dofh =
650  dof_handlers.hp_dof_handler[no];
651  typename hp::DoFHandler<dim>::active_cell_iterator
652  cell_it (&dofh->get_tria(),
653  cell_level_index[counter].first,
654  cell_level_index[counter].second,
655  dofh);
656  if (dofh->get_fe().size() > 1)
657  dof_info[no].cell_active_fe_index[counter] =
658  cell_it->active_fe_index();
659  local_dof_indices.resize (cell_it->get_fe().dofs_per_cell);
660  cell_it->get_dof_indices(local_dof_indices);
661  dof_info[no].read_dof_indices (local_dof_indices,
662  lexicographic_inv[no][cell_it->active_fe_index()],
663  *constraint[no], counter,
664  constraint_values,
665  cell_at_boundary);
666  }
667  else
668  {
669  Assert (false, ExcNotImplemented());
670  }
671  }
672 
673  // if we found dofs on some FE component that belong to other
674  // processors, the cell is added to the boundary cells.
675  if (cell_at_boundary == true)
676  boundary_cells.push_back(counter);
677  }
678 
679  const unsigned int vectorization_length =
681  std::vector<unsigned int> irregular_cells;
682  size_info.make_layout (n_active_cells, vectorization_length, boundary_cells,
683  irregular_cells);
684 
685  for (unsigned int no=0; no<n_fe; ++no)
686  dof_info[no].assign_ghosts (boundary_cells);
687 
688  // reorganize the indices in order to overlap communication in MPI with
689  // computations: Place all cells with ghost indices into one chunk. Also
690  // reorder cells so that we can parallelize by threads
691  std::vector<unsigned int> renumbering;
692  if (task_info.use_multithreading == true)
693  {
694  dof_info[0].compute_renumber_parallel (boundary_cells, size_info,
695  renumbering);
696  if (task_info.use_partition_partition == true)
697  dof_info[0].make_thread_graph_partition_partition
698  (size_info, task_info, renumbering, irregular_cells,
699  dof_handlers.active_dof_handler == DoFHandlers::hp);
700  else
701  dof_info[0].make_thread_graph_partition_color
702  (size_info, task_info, renumbering, irregular_cells,
703  dof_handlers.active_dof_handler == DoFHandlers::hp);
704  }
705  else
706  {
707  // In case, we have an hp-dofhandler, we have to reorder the cell
708  // according to the polynomial degree on the cell.
709  dof_info[0].compute_renumber_serial (boundary_cells, size_info,
710  renumbering);
711  if (dof_handlers.active_dof_handler == DoFHandlers::hp)
712  dof_info[0].compute_renumber_hp_serial (size_info, renumbering,
713  irregular_cells);
714  }
715 
716  // Finally perform the renumbering. We also want to group several cells
717  // together to one "macro-cell" for vectorization (where the arithmetic
718  // operations will then be done simultaneously).
719 #ifdef DEBUG
720  {
721  std::vector<unsigned int> sorted_renumbering (renumbering);
722  std::sort (sorted_renumbering.begin(), sorted_renumbering.end());
723  for (unsigned int i=0; i<sorted_renumbering.size(); ++i)
724  Assert (sorted_renumbering[i] == i, ExcInternalError());
725  }
726 #endif
727  {
728  std::vector<std::pair<unsigned int,unsigned int> >
729  cell_level_index_old;
730  cell_level_index.swap (cell_level_index_old);
731  cell_level_index.reserve(size_info.n_macro_cells*vectorization_length);
732  unsigned int position_cell=0;
733  for (unsigned int i=0; i<size_info.n_macro_cells; ++i)
734  {
735  unsigned int n_comp = (irregular_cells[i]>0)?
736  irregular_cells[i] : vectorization_length;
737  for (unsigned int j=0; j<n_comp; ++j)
738  cell_level_index.push_back
739  (cell_level_index_old[renumbering[position_cell+j]]);
740 
741  // generate a cell and level index also when we have not filled up
742  // vectorization_length cells. This is needed for MappingInfo when the
743  // transformation data is initialized. We just set the value to the
744  // last valid cell in that case.
745  for (unsigned int j=n_comp; j<vectorization_length; ++j)
746  cell_level_index.push_back
747  (cell_level_index_old[renumbering[position_cell+n_comp-1]]);
748  position_cell += n_comp;
749  }
750  AssertDimension (position_cell, size_info.n_active_cells);
751  AssertDimension (cell_level_index.size(),size_info.n_macro_cells*vectorization_length);
752  }
753 
754  // set constraint pool from the std::map and reorder the indices
755  typename std::map<std::vector<double>, types::global_dof_index,
757  it = constraint_values.constraints.begin(),
758  end = constraint_values.constraints.end();
759  std::vector<const std::vector<double>*>
760  constraints (constraint_values.constraints.size());
761  unsigned int length = 0;
762  for ( ; it != end; ++it)
763  {
764  AssertIndexRange(it->second, constraints.size());
765  constraints[it->second] = &it->first;
766  length += it->first.size();
767  }
768  constraint_pool_data.clear();
769  constraint_pool_data.reserve (length);
770  constraint_pool_row_index.reserve(constraint_values.constraints.size()+1);
771  constraint_pool_row_index.resize(1, 0);
772  for (unsigned int i=0; i<constraints.size(); ++i)
773  {
774  Assert(constraints[i] != 0, ExcInternalError());
776  constraints[i]->begin(),
777  constraints[i]->end());
779  }
780  AssertDimension(constraint_pool_data.size(), length);
781  for (unsigned int no=0; no<n_fe; ++no)
782  dof_info[no].reorder_cells(size_info, renumbering,
784  irregular_cells, vectorization_length);
785 
787 }
788 
789 
790 
791 template <int dim, typename Number>
793 {
794  dof_info.clear();
795  mapping_info.clear();
796  cell_level_index.clear();
797  size_info.clear();
798  task_info.clear();
799  dof_handlers.dof_handler.clear();
800  dof_handlers.hp_dof_handler.clear();
801  indices_are_initialized = false;
802  mapping_is_initialized = false;
803 }
804 
805 
806 
807 template <int dim, typename Number>
809 {
810  std::size_t memory = MemoryConsumption::memory_consumption (dof_info);
816  memory += sizeof(this);
817  memory += mapping_info.memory_consumption();
818  return memory;
819 }
820 
821 
822 template <int dim, typename Number>
823 template <typename STREAM>
825 {
826  out << " Memory cell FE operator total: --> ";
828  out << " Memory cell index: ";
831  for (unsigned int j=0; j<dof_info.size(); ++ j)
832  {
833  out << " Memory DoFInfo component "<< j << std::endl;
834  dof_info[j].print_memory_consumption(out, size_info);
835  }
836 
837  out << " Memory mapping info" << std::endl;
838  mapping_info.print_memory_consumption(out, size_info);
839 
840  out << " Memory unit cell shape data: ";
843  if (task_info.use_multithreading == true)
844  {
845  out << " Memory task partitioning info: ";
848  }
849 }
850 
851 
852 
853 template <int dim, typename Number>
854 void MatrixFree<dim,Number>::print (std::ostream &out) const
855 {
856  // print indices local to global
857  for (unsigned int no=0; no<dof_info.size(); ++no)
858  {
859  out << "\n-- Index data for component " << no << " --" << std::endl;
861  out << std::endl;
862  }
863 }
864 
865 
866 
867 /*-------------------- Implementation of helper functions ------------------*/
868 
869 namespace internal
870 {
871  namespace MatrixFreeFunctions
872  {
873 
874  TaskInfo::TaskInfo ()
875  {
876  clear();
877  }
878 
879 
880 
881  void TaskInfo::clear ()
882  {
883  block_size = 0;
884  n_blocks = 0;
885  block_size_last = 0;
886  position_short_block = 0;
887  use_multithreading = false;
888  use_partition_partition = false;
889  use_coloring_only = false;
890  partition_color_blocks_row_index.clear();
891  partition_color_blocks_data.clear();
892  evens = 0;
893  odds = 0;
894  n_blocked_workers = 0;
895  n_workers = 0;
896  partition_evens.clear();
897  partition_odds.clear();
898  partition_n_blocked_workers.clear();
899  partition_n_workers.clear();
900  }
901 
902 
903 
904  std::size_t
905  TaskInfo::memory_consumption () const
906  {
907  return (MemoryConsumption::memory_consumption (partition_color_blocks_row_index) +
908  MemoryConsumption::memory_consumption (partition_color_blocks_data)+
909  MemoryConsumption::memory_consumption (partition_evens) +
910  MemoryConsumption::memory_consumption (partition_odds) +
911  MemoryConsumption::memory_consumption (partition_n_blocked_workers) +
912  MemoryConsumption::memory_consumption (partition_n_workers));
913  }
914 
915 
916 
917  SizeInfo::SizeInfo ()
918  {
919  clear();
920  }
921 
922 
923 
924  void SizeInfo::clear()
925  {
926  n_active_cells = 0;
927  n_macro_cells = 0;
928  boundary_cells_start = 0;
929  boundary_cells_end = 0;
930  vectorization_length = 0;
931  locally_owned_cells = IndexSet();
932  ghost_cells = IndexSet();
933  communicator = MPI_COMM_SELF;
934  my_pid = 0;
935  n_procs = 0;
936  }
937 
938 
939 
940  template <typename STREAM>
941  void SizeInfo::print_memory_statistics (STREAM &out,
942  std::size_t data_length) const
943  {
944  Utilities::MPI::MinMaxAvg memory_c;
946  {
947  memory_c = Utilities::MPI::min_max_avg (1e-6*data_length,
948  communicator);
949  }
950  else
951  {
952  memory_c.sum = 1e-6*data_length;
953  memory_c.min = memory_c.sum;
954  memory_c.max = memory_c.sum;
955  memory_c.avg = memory_c.sum;
956  memory_c.min_index = 0;
957  memory_c.max_index = 0;
958  }
959  if (n_procs < 2)
960  out << memory_c.min;
961  else
962  out << memory_c.min << "/" << memory_c.avg << "/" << memory_c.max;
963  out << " MB" << std::endl;
964  }
965 
966 
967 
968  inline
969  void SizeInfo::make_layout (const unsigned int n_active_cells_in,
970  const unsigned int vectorization_length_in,
971  std::vector<unsigned int> &boundary_cells,
972  std::vector<unsigned int> &irregular_cells)
973  {
974  vectorization_length = vectorization_length_in;
975  n_active_cells = n_active_cells_in;
976 
977  unsigned int n_max_boundary_cells = boundary_cells.size();
978  unsigned int n_boundary_cells = n_max_boundary_cells;
979 
980  // try to make the number of boundary cells divisible by the number of
981  // vectors in vectorization
982 
983  /*
984  // try to balance the number of cells before and after the boundary part
985  // on each processor. probably not worth it!
986  #ifdef DEAL_II_WITH_MPI
987  MPI_Allreduce (&n_boundary_cells, &n_max_boundary_cells, 1, MPI_UNSIGNED,
988  MPI_MAX, size_info.communicator);
989  #endif
990  if (n_max_boundary_cells > n_active_cells)
991  n_max_boundary_cells = n_active_cells;
992  */
993 
994  unsigned int fillup_needed =
995  (vectorization_length - n_boundary_cells%vectorization_length)%vectorization_length;
996  if (fillup_needed > 0 && n_boundary_cells < n_active_cells)
997  {
998  // fill additional cells into the list of boundary cells to get a
999  // balanced number. Go through the indices successively until we
1000  // found enough indices
1001  std::vector<unsigned int> new_boundary_cells;
1002  new_boundary_cells.reserve (n_max_boundary_cells);
1003 
1004  unsigned int next_free_slot = 0, bound_index = 0;
1005  while (fillup_needed > 0 && bound_index < boundary_cells.size())
1006  {
1007  if (next_free_slot < boundary_cells[bound_index])
1008  {
1009  // check if there are enough cells to fill with in the
1010  // current slot
1011  if (next_free_slot + fillup_needed <= boundary_cells[bound_index])
1012  {
1013  for (unsigned int j=boundary_cells[bound_index]-fillup_needed;
1014  j < boundary_cells[bound_index]; ++j)
1015  new_boundary_cells.push_back(j);
1016  fillup_needed = 0;
1017  }
1018  // ok, not enough indices, so just take them all up to the
1019  // next boundary cell
1020  else
1021  {
1022  for (unsigned int j=next_free_slot;
1023  j<boundary_cells[bound_index]; ++j)
1024  new_boundary_cells.push_back(j);
1025  fillup_needed -= boundary_cells[bound_index]-next_free_slot;
1026  }
1027  }
1028  new_boundary_cells.push_back(boundary_cells[bound_index]);
1029  next_free_slot = boundary_cells[bound_index]+1;
1030  ++bound_index;
1031  }
1032  while (fillup_needed > 0 && (new_boundary_cells.size()==0 ||
1033  new_boundary_cells.back()<n_active_cells-1))
1034  new_boundary_cells.push_back(new_boundary_cells.back()+1);
1035  while (bound_index<boundary_cells.size())
1036  new_boundary_cells.push_back(boundary_cells[bound_index++]);
1037 
1038  boundary_cells.swap(new_boundary_cells);
1039  }
1040 
1041  // set the number of cells
1042  std::sort (boundary_cells.begin(), boundary_cells.end());
1043  n_boundary_cells = boundary_cells.size();
1044 
1045  // check that number of boundary cells is divisible by
1046  // vectorization_length or that it contains all cells
1047  Assert (n_boundary_cells % vectorization_length == 0 ||
1048  n_boundary_cells == n_active_cells, ExcInternalError());
1049  n_macro_cells = (n_active_cells+vectorization_length-1)/vectorization_length;
1050  irregular_cells.resize (n_macro_cells);
1051  if (n_macro_cells*vectorization_length > n_active_cells)
1052  {
1053  irregular_cells[n_macro_cells-1] =
1054  vectorization_length - (n_macro_cells*vectorization_length - n_active_cells);
1055  }
1056  if (n_procs > 1)
1057  {
1058  const unsigned int n_macro_boundary_cells =
1059  (n_boundary_cells+vectorization_length-1)/vectorization_length;
1060  boundary_cells_start = (n_macro_cells-n_macro_boundary_cells)/2;
1061  boundary_cells_end = boundary_cells_start + n_macro_boundary_cells;
1062  }
1063  else
1064  boundary_cells_start = boundary_cells_end = n_macro_cells;
1065  }
1066 
1067 
1068 
1069  /* ------------------------------------------------------------------ */
1070 
1071  template <typename Number>
1072  FPArrayComparator<Number>::FPArrayComparator (const Number scaling)
1073  :
1074  tolerance (scaling * std::numeric_limits<double>::epsilon() * 1024.)
1075  {}
1076 
1077 
1078 
1079  template <typename Number>
1080  bool
1081  FPArrayComparator<Number>::operator() (const std::vector<Number> &v1,
1082  const std::vector<Number> &v2) const
1083  {
1084  const unsigned int s1 = v1.size(), s2 = v2.size();
1085  if (s1 < s2)
1086  return true;
1087  else if (s1 > s2)
1088  return false;
1089  else
1090  for (unsigned int i=0; i<s1; ++i)
1091  if (v1[i] < v2[i] - tolerance)
1092  return true;
1093  else if (v1[i] > v2[i] + tolerance)
1094  return false;
1095  return false;
1096  }
1097 
1098 
1099 
1100  template <typename Number>
1101  template <int dim>
1102  bool
1105  const Tensor<1,dim,Tensor<1,VectorizedArray<Number>::n_array_elements,Number> > &t2) const
1106  {
1107  for (unsigned int d=0; d<dim; ++d)
1108  for (unsigned int k=0; k<VectorizedArray<Number>::n_array_elements; ++k)
1109  if ((t1)[d][k] < (t2)[d][k] - tolerance)
1110  return true;
1111  else if ((t1)[d][k] > (t2)[d][k] + tolerance)
1112  return false;
1113  return false;
1114  }
1115 
1116 
1117 
1118  template <typename Number>
1119  template <int dim>
1120  bool
1123  const Tensor<2,dim,Tensor<1,VectorizedArray<Number>::n_array_elements,Number> > &t2) const
1124  {
1125  for (unsigned int d=0; d<dim; ++d)
1126  for (unsigned int e=0; e<dim; ++e)
1127  for (unsigned int k=0; k<VectorizedArray<Number>::n_array_elements; ++k)
1128  if ((t1)[d][e][k] < (t2)[d][e][k] - tolerance)
1129  return true;
1130  else if ((t1)[d][e][k] > (t2)[d][e][k] + tolerance)
1131  return false;
1132  return false;
1133  }
1134  }
1135 }
1136 
1137 
1138 DEAL_II_NAMESPACE_CLOSE
void internal_reinit(const Mapping< dim > &mapping, const std::vector< const DoFHandler< dim > * > &dof_handler, const std::vector< const ConstraintMatrix * > &constraint, const std::vector< IndexSet > &locally_owned_set, const std::vector< hp::QCollection< 1 > > &quad, const AdditionalData additional_data)
internal::MatrixFreeFunctions::TaskInfo task_info
Definition: matrix_free.h:923
bool indices_are_initialized
Definition: matrix_free.h:928
static const unsigned int invalid_unsigned_int
Definition: types.h:191
void reinit(const TableIndices< N > &new_size, const bool fast=false)
std::vector< unsigned int > constraint_pool_row_index
Definition: matrix_free.h:893
#define AssertDimension(dim1, dim2)
Definition: exceptions.h:858
internal::MatrixFreeFunctions::MappingInfo< dim, Number > mapping_info
Definition: matrix_free.h:899
DoFHandlers dof_handlers
Definition: matrix_free.h:873
unsigned int component_to_system_index(const unsigned int component, const unsigned int index) const
Definition: fe.h:2082
void initialize_dof_handlers(const std::vector< const DoFHandler< dim > * > &dof_handlers, const unsigned int level)
void make_layout(const unsigned int n_active_cells_in, const unsigned int vectorization_length_in, std::vector< unsigned int > &boundary_cells, std::vector< unsigned int > &irregular_cells)
::ExceptionBase & ExcMessage(std::string arg1)
std::vector< Number > constraint_pool_data
Definition: matrix_free.h:887
#define AssertIndexRange(index, range)
Definition: exceptions.h:888
Table< 4, internal::MatrixFreeFunctions::ShapeInfo< Number > > shape_info
Definition: matrix_free.h:904
void copy_from(const MatrixFree< dim, Number > &matrix_free_base)
std::vector< std::pair< unsigned int, unsigned int > > cell_level_index
Definition: matrix_free.h:912
unsigned int size() const
virtual const FiniteElement< dim, spacedim > & base_element(const unsigned int index) const
unsigned int n_active_cells() const
unsigned int n_levels() const
std::vector< unsigned int > get_poly_space_numbering() const
bool mapping_is_initialized
Definition: matrix_free.h:933
unsigned int n_macro_cells() const
cell_iterator end() const
internal::MatrixFreeFunctions::SizeInfo size_info
Definition: matrix_free.h:918
const hp::FECollection< dim, spacedim > & get_fe() const
unsigned int element_multiplicity(const unsigned int index) const
Definition: fe.h:2072
void print_memory_consumption(STREAM &out) const
unsigned int n_cells() const
void print_memory_statistics(STREAM &out, std::size_t data_length) const
unsigned int global_dof_index
Definition: types.h:100
#define Assert(cond, exc)
Definition: exceptions.h:299
unsigned int n_components() const
std::size_t memory_consumption(const T &t)
cell_iterator begin(const unsigned int level=0) const
unsigned int n_base_elements() const
Definition: fe.h:2062
TasksParallelScheme tasks_parallel_scheme
Definition: matrix_free.h:205
std::vector< unsigned int > invert_permutation(const std::vector< unsigned int > &permutation)
unsigned int n_mpi_processes(const MPI_Comm &mpi_communicator)
const unsigned int dofs_per_cell
Definition: fe_base.h:271
std::size_t memory_consumption() const
unsigned int tasks_block_size
Definition: matrix_free.h:217
const Triangulation< dim, spacedim > & get_tria() const
unsigned int level_mg_handler
Definition: matrix_free.h:240
std::vector< unsigned int > get_poly_space_numbering_inverse() const
const unsigned int dofs_per_face
Definition: fe_base.h:264
UpdateFlags mapping_update_flags
Definition: matrix_free.h:230
unsigned int this_mpi_process(const MPI_Comm &mpi_communicator)
::ExceptionBase & ExcNotImplemented()
MultithreadInfo multithread_info
const Triangulation< dim, spacedim > & get_tria() const
std::vector< internal::MatrixFreeFunctions::DoFInfo > dof_info
Definition: matrix_free.h:879
MinMaxAvg min_max_avg(const double my_value, const MPI_Comm &mpi_communicator)
::ExceptionBase & ExcInternalError()
void initialize_indices(const std::vector< const ConstraintMatrix * > &constraint, const std::vector< IndexSet > &locally_owned_set)
void print(std::ostream &out) const
const FiniteElement< dim, spacedim > & get_fe() const