18 #include <deal.II/base/utilities.h> 19 #include <deal.II/base/memory_consumption.h> 20 #include <deal.II/base/tensor_product_polynomials.h> 21 #include <deal.II/base/polynomials_piecewise.h> 22 #include <deal.II/base/mpi.h> 23 #include <deal.II/dofs/dof_accessor.h> 24 #include <deal.II/fe/fe_poly.h> 25 #include <deal.II/hp/q_collection.h> 26 #include <deal.II/distributed/tria.h> 28 #include <deal.II/matrix_free/matrix_free.h> 29 #include <deal.II/matrix_free/shape_info.templates.h> 30 #include <deal.II/matrix_free/mapping_info.templates.h> 31 #include <deal.II/matrix_free/dof_info.templates.h> 39 template <
int dim,
typename Number>
42 indices_are_initialized (false),
43 mapping_is_initialized (false)
48 template <
int dim,
typename Number>
54 template <
int dim,
typename Number>
77 void assert_communicator_equality (const ::Triangulation<dim> &tria,
78 const MPI_Comm &comm_mf)
80 #ifdef DEAL_II_WITH_MPI 87 int communicators_same = 0;
90 Assert (communicators_same == MPI_IDENT ||
91 communicators_same == MPI_CONGRUENT,
92 ExcMessage (
"MPI communicator in parallel::distributed::Triangulation " 93 "and matrix free class must be the same!"));
105 template <
int dim,
typename Number>
109 const std::vector<const ConstraintMatrix *> &constraint,
110 const std::vector<IndexSet> &locally_owned_set,
122 internal::assert_communicator_equality (dof_handler[0]->get_tria(),
139 for (
unsigned int no=0; no<dof_handler.size(); ++no)
144 #ifdef DEAL_II_WITH_THREADS
152 AdditionalData::partition_partition ?
true :
false);
155 AdditionalData::color ?
true :
false);
169 else if (
dof_info.size() != dof_handler.size())
172 std::vector<unsigned int> dummy;
176 for (
unsigned int i=0; i<
dof_info.size(); ++i)
179 dof_info[i].n_components = dof_handler[i]->get_fe().element_multiplicity(0);
180 dof_info[i].dofs_per_cell.push_back(dof_handler[i]->get_fe().dofs_per_cell);
196 const unsigned int n_fe = dof_handler.size();
197 const unsigned int n_quad = quad.size();
199 for (
unsigned int no=0; no<n_fe; no++)
202 for (
unsigned int nq =0; nq<n_quad; nq++)
218 dof_info[0].cell_active_fe_index, mapping, quad,
227 template <
int dim,
typename Number>
231 const std::vector<const ConstraintMatrix *> &constraint,
232 const std::vector<IndexSet> &locally_owned_set,
244 internal::assert_communicator_equality (dof_handler[0]->get_tria(),
261 for (
unsigned int no=0; no<dof_handler.size(); ++no)
266 #ifdef DEAL_II_WITH_THREADS
274 AdditionalData::partition_partition ?
true :
false);
277 AdditionalData::color ?
true :
false);
291 else if (
dof_info.size() != dof_handler.size())
294 std::vector<unsigned int> dummy;
298 for (
unsigned int i=0; i<
dof_info.size(); ++i)
302 dof_info[i].n_components = dof_handler[i]->get_fe()[0].element_multiplicity(0);
303 dof_info[i].dofs_per_cell.push_back(dof_handler[i]->get_fe()[0].dofs_per_cell);
320 const unsigned int n_quad = quad.size();
321 unsigned int n_fe_in_collection = 0;
323 n_fe_in_collection = std::max (n_fe_in_collection,
324 dof_handler[i]->get_fe().size());
325 unsigned int n_quad_in_collection = 0;
326 for (
unsigned int q=0; q<n_quad; ++q)
327 n_quad_in_collection = std::max (n_quad_in_collection, quad[q].size());
330 n_quad_in_collection));
332 for (
unsigned int fe_no=0; fe_no<dof_handler[no]->get_fe().size(); ++fe_no)
335 for (
unsigned int nq =0; nq<n_quad; nq++)
336 for (
unsigned int q_no=0; q_no<quad[nq].size(); ++q_no)
347 dof_info[0].cell_active_fe_index, mapping, quad,
361 template <
typename InIterator>
362 void resolve_cell (
const InIterator &cell,
363 std::vector<std::pair<unsigned int,unsigned int> > &cell_its,
364 const unsigned int subdomain_id)
366 if (cell->has_children())
367 for (
unsigned int child=0; child<cell->n_children(); ++child)
368 resolve_cell (cell->child(child), cell_its,
370 else if (cell->subdomain_id() == subdomain_id)
373 cell_its.push_back (std::pair<unsigned int,unsigned int>
374 (cell->level(), cell->index()));
381 template <
int dim,
typename Number>
384 const unsigned int level)
390 for (
unsigned int no=0; no<
dof_handlers.n_dof_handlers; ++no)
398 const unsigned int n_mpi_procs =
size_info.n_procs;
399 const unsigned int my_pid =
size_info.my_pid;
404 if (n_mpi_procs == 1)
407 end_cell = tria.
end(0);
408 for ( ; cell != end_cell; ++cell)
416 end_cell = tria.
end(level);
417 for ( ; cell != end_cell; ++cell)
419 (cell->level(), cell->index()));
425 template <
int dim,
typename Number>
433 for (
unsigned int no=0; no<
dof_handlers.n_dof_handlers; ++no)
441 const unsigned int n_mpi_procs =
size_info.n_procs;
442 const unsigned int my_pid =
size_info.my_pid;
448 if (n_mpi_procs == 1)
452 typename hp::DoFHandler<dim>::cell_iterator cell = dof_handler[0]->begin(0),
453 end_cell = dof_handler[0]->end(0);
454 for ( ; cell != end_cell; ++cell)
461 template <
int dim,
typename Number>
463 (
const std::vector<const ConstraintMatrix *> &constraint,
464 const std::vector<IndexSet> &locally_owned_set)
473 std::vector<types::global_dof_index> local_dof_indices;
474 std::vector<std::vector<std::vector<unsigned int> > > lexicographic_inv(n_fe);
477 std::vector<unsigned int> constraint_indices;
479 for (
unsigned int no=0; no<n_fe; ++no)
481 std::vector<const FiniteElement<dim>*> fes;
486 for (
unsigned int f=0; f<fe.
size(); ++f)
487 fes.push_back (&fe[f]);
491 for (
unsigned int ind=0; ind<hpdof->
get_fe().size(); ++ind)
492 dof_info[no].fe_index_conversion[ind] =
493 std::pair<unsigned int,unsigned int>(fe[ind].degree,
494 fe[ind].dofs_per_cell);
496 dof_info[no].cell_active_fe_index.resize(n_active_cells,
502 fes.push_back (&dofh->
get_fe());
504 dof_info[no].fe_index_conversion.resize (1);
505 dof_info[no].fe_index_conversion[0] =
506 std::pair<unsigned int,unsigned int>(fes.back()->degree,
507 fes.back()->dofs_per_cell);
510 lexicographic_inv[no].resize (fes.size());
511 for (
unsigned int fe_index = 0; fe_index<fes.size(); ++fe_index)
515 ExcMessage (
"MatrixFree only works for DoFHandler with one base element"));
522 dof_info[no].n_components = n_fe_components;
543 if (n_fe_components == 1)
546 lexicographic_inv[no][fe_index] =
549 lexicographic_inv[no][fe_index] =
552 dof_info[no].dofs_per_cell[fe_index]);
558 std::vector<unsigned int> scalar_lex =
562 dof_info[no].dofs_per_cell[fe_index]);
563 std::vector<unsigned int> lexicographic (
dof_info[no].dofs_per_cell[fe_index]);
564 for (
unsigned int comp=0; comp<n_fe_components; ++comp)
565 for (
unsigned int i=0; i<scalar_lex.size(); ++i)
567 = scalar_lex.size () * comp + scalar_lex[i];
570 lexicographic_inv[no][fe_index] =
574 dof_info[no].dofs_per_cell[fe_index]);
579 dof_info[no].vector_partitioner.reset
583 dof_info[no].row_starts.resize (n_active_cells+1);
588 ((n_active_cells*
dof_info[no].dofs_per_cell[0]*3)/2);
593 start_index =
dof_info[no].vector_partitioner->local_range().first,
594 end_index =
dof_info[no].vector_partitioner->local_range().second;
596 if (constraint[no]->is_constrained(i)==
true)
598 push_back(static_cast<unsigned int>(i-start_index));
604 std::vector<unsigned int> boundary_cells;
605 for (
unsigned int counter = 0 ; counter < n_active_cells ; ++counter)
607 bool cell_at_boundary =
false;
608 for (
unsigned int no=0; no<n_fe; ++no)
611 if (
dof_handlers.active_dof_handler == DoFHandlers::usual &&
620 local_dof_indices.resize (
dof_info[no].dofs_per_cell[0]);
621 cell_it->get_dof_indices(local_dof_indices);
622 dof_info[no].read_dof_indices (local_dof_indices,
623 lexicographic_inv[no][0],
624 *constraint[no], counter,
629 else if (
dof_handlers.active_dof_handler == DoFHandlers::usual &&
639 local_dof_indices.resize (
dof_info[no].dofs_per_cell[0]);
640 cell_it->get_mg_dof_indices(local_dof_indices);
641 dof_info[no].read_dof_indices (local_dof_indices,
642 lexicographic_inv[no][0],
643 *constraint[no], counter,
647 else if (
dof_handlers.active_dof_handler == DoFHandlers::hp)
651 typename hp::DoFHandler<dim>::active_cell_iterator
656 if (dofh->
get_fe().size() > 1)
657 dof_info[no].cell_active_fe_index[counter] =
658 cell_it->active_fe_index();
659 local_dof_indices.resize (cell_it->get_fe().dofs_per_cell);
660 cell_it->get_dof_indices(local_dof_indices);
661 dof_info[no].read_dof_indices (local_dof_indices,
662 lexicographic_inv[no][cell_it->active_fe_index()],
663 *constraint[no], counter,
675 if (cell_at_boundary ==
true)
676 boundary_cells.push_back(counter);
679 const unsigned int vectorization_length =
681 std::vector<unsigned int> irregular_cells;
685 for (
unsigned int no=0; no<n_fe; ++no)
686 dof_info[no].assign_ghosts (boundary_cells);
691 std::vector<unsigned int> renumbering;
692 if (
task_info.use_multithreading ==
true)
696 if (
task_info.use_partition_partition ==
true)
697 dof_info[0].make_thread_graph_partition_partition
701 dof_info[0].make_thread_graph_partition_color
721 std::vector<unsigned int> sorted_renumbering (renumbering);
722 std::sort (sorted_renumbering.begin(), sorted_renumbering.end());
723 for (
unsigned int i=0; i<sorted_renumbering.size(); ++i)
728 std::vector<std::pair<unsigned int,unsigned int> >
729 cell_level_index_old;
732 unsigned int position_cell=0;
733 for (
unsigned int i=0; i<
size_info.n_macro_cells; ++i)
735 unsigned int n_comp = (irregular_cells[i]>0)?
736 irregular_cells[i] : vectorization_length;
737 for (
unsigned int j=0; j<n_comp; ++j)
739 (cell_level_index_old[renumbering[position_cell+j]]);
745 for (
unsigned int j=n_comp; j<vectorization_length; ++j)
747 (cell_level_index_old[renumbering[position_cell+n_comp-1]]);
748 position_cell += n_comp;
757 it = constraint_values.constraints.begin(),
758 end = constraint_values.constraints.end();
759 std::vector<const std::vector<double>*>
760 constraints (constraint_values.constraints.size());
761 unsigned int length = 0;
762 for ( ; it != end; ++it)
765 constraints[it->second] = &it->first;
766 length += it->first.size();
772 for (
unsigned int i=0; i<constraints.size(); ++i)
776 constraints[i]->begin(),
777 constraints[i]->end());
781 for (
unsigned int no=0; no<n_fe; ++no)
784 irregular_cells, vectorization_length);
791 template <
int dim,
typename Number>
807 template <
int dim,
typename Number>
816 memory +=
sizeof(
this);
822 template <
int dim,
typename Number>
823 template <
typename STREAM>
826 out <<
" Memory cell FE operator total: --> ";
828 out <<
" Memory cell index: ";
831 for (
unsigned int j=0; j<
dof_info.size(); ++ j)
833 out <<
" Memory DoFInfo component "<< j << std::endl;
837 out <<
" Memory mapping info" << std::endl;
840 out <<
" Memory unit cell shape data: ";
843 if (
task_info.use_multithreading ==
true)
845 out <<
" Memory task partitioning info: ";
853 template <
int dim,
typename Number>
857 for (
unsigned int no=0; no<
dof_info.size(); ++no)
859 out <<
"\n-- Index data for component " << no <<
" --" << std::endl;
871 namespace MatrixFreeFunctions
874 TaskInfo::TaskInfo ()
881 void TaskInfo::clear ()
886 position_short_block = 0;
887 use_multithreading =
false;
888 use_partition_partition =
false;
889 use_coloring_only =
false;
890 partition_color_blocks_row_index.clear();
891 partition_color_blocks_data.clear();
894 n_blocked_workers = 0;
896 partition_evens.clear();
897 partition_odds.clear();
898 partition_n_blocked_workers.clear();
899 partition_n_workers.clear();
905 TaskInfo::memory_consumption ()
const 917 SizeInfo::SizeInfo ()
924 void SizeInfo::clear()
928 boundary_cells_start = 0;
929 boundary_cells_end = 0;
930 vectorization_length = 0;
933 communicator = MPI_COMM_SELF;
940 template <
typename STREAM>
941 void SizeInfo::print_memory_statistics (STREAM &out,
942 std::size_t data_length)
const 952 memory_c.sum = 1e-6*data_length;
953 memory_c.min = memory_c.sum;
954 memory_c.max = memory_c.sum;
955 memory_c.avg = memory_c.sum;
956 memory_c.min_index = 0;
957 memory_c.max_index = 0;
962 out << memory_c.min <<
"/" << memory_c.avg <<
"/" << memory_c.max;
963 out <<
" MB" << std::endl;
969 void SizeInfo::make_layout (
const unsigned int n_active_cells_in,
970 const unsigned int vectorization_length_in,
971 std::vector<unsigned int> &boundary_cells,
972 std::vector<unsigned int> &irregular_cells)
974 vectorization_length = vectorization_length_in;
975 n_active_cells = n_active_cells_in;
977 unsigned int n_max_boundary_cells = boundary_cells.size();
978 unsigned int n_boundary_cells = n_max_boundary_cells;
994 unsigned int fillup_needed =
995 (vectorization_length - n_boundary_cells%vectorization_length)%vectorization_length;
996 if (fillup_needed > 0 && n_boundary_cells < n_active_cells)
1001 std::vector<unsigned int> new_boundary_cells;
1002 new_boundary_cells.reserve (n_max_boundary_cells);
1004 unsigned int next_free_slot = 0, bound_index = 0;
1005 while (fillup_needed > 0 && bound_index < boundary_cells.size())
1007 if (next_free_slot < boundary_cells[bound_index])
1011 if (next_free_slot + fillup_needed <= boundary_cells[bound_index])
1013 for (
unsigned int j=boundary_cells[bound_index]-fillup_needed;
1014 j < boundary_cells[bound_index]; ++j)
1015 new_boundary_cells.push_back(j);
1022 for (
unsigned int j=next_free_slot;
1023 j<boundary_cells[bound_index]; ++j)
1024 new_boundary_cells.push_back(j);
1025 fillup_needed -= boundary_cells[bound_index]-next_free_slot;
1028 new_boundary_cells.push_back(boundary_cells[bound_index]);
1029 next_free_slot = boundary_cells[bound_index]+1;
1032 while (fillup_needed > 0 && (new_boundary_cells.size()==0 ||
1033 new_boundary_cells.back()<n_active_cells-1))
1034 new_boundary_cells.push_back(new_boundary_cells.back()+1);
1035 while (bound_index<boundary_cells.size())
1036 new_boundary_cells.push_back(boundary_cells[bound_index++]);
1038 boundary_cells.swap(new_boundary_cells);
1042 std::sort (boundary_cells.begin(), boundary_cells.end());
1043 n_boundary_cells = boundary_cells.size();
1047 Assert (n_boundary_cells % vectorization_length == 0 ||
1049 n_macro_cells = (n_active_cells+vectorization_length-1)/vectorization_length;
1054 vectorization_length - (
n_macro_cells*vectorization_length - n_active_cells);
1058 const unsigned int n_macro_boundary_cells =
1059 (n_boundary_cells+vectorization_length-1)/vectorization_length;
1060 boundary_cells_start = (
n_macro_cells-n_macro_boundary_cells)/2;
1061 boundary_cells_end = boundary_cells_start + n_macro_boundary_cells;
1071 template <
typename Number>
1074 tolerance (scaling * std::numeric_limits<double>::epsilon() * 1024.)
1079 template <
typename Number>
1082 const std::vector<Number> &v2)
const 1084 const unsigned int s1 = v1.size(), s2 = v2.size();
1090 for (
unsigned int i=0; i<s1; ++i)
1091 if (v1[i] < v2[i] - tolerance)
1093 else if (v1[i] > v2[i] + tolerance)
1100 template <
typename Number>
1107 for (
unsigned int d=0; d<dim; ++d)
1108 for (
unsigned int k=0; k<VectorizedArray<Number>::n_array_elements; ++k)
1109 if ((t1)[d][k] < (t2)[d][k] - tolerance)
1111 else if ((t1)[d][k] > (t2)[d][k] + tolerance)
1118 template <
typename Number>
1125 for (
unsigned int d=0; d<dim; ++d)
1126 for (
unsigned int e=0; e<dim; ++e)
1127 for (
unsigned int k=0; k<VectorizedArray<Number>::n_array_elements; ++k)
1128 if ((t1)[d][e][k] < (t2)[d][e][k] - tolerance)
1130 else if ((t1)[d][e][k] > (t2)[d][e][k] + tolerance)
1138 DEAL_II_NAMESPACE_CLOSE
void internal_reinit(const Mapping< dim > &mapping, const std::vector< const DoFHandler< dim > * > &dof_handler, const std::vector< const ConstraintMatrix * > &constraint, const std::vector< IndexSet > &locally_owned_set, const std::vector< hp::QCollection< 1 > > &quad, const AdditionalData additional_data)
MPI_Comm get_communicator() const
internal::MatrixFreeFunctions::TaskInfo task_info
bool indices_are_initialized
static const unsigned int invalid_unsigned_int
void reinit(const TableIndices< N > &new_size, const bool fast=false)
std::vector< unsigned int > constraint_pool_row_index
#define AssertDimension(dim1, dim2)
internal::MatrixFreeFunctions::MappingInfo< dim, Number > mapping_info
unsigned int component_to_system_index(const unsigned int component, const unsigned int index) const
void initialize_dof_handlers(const std::vector< const DoFHandler< dim > * > &dof_handlers, const unsigned int level)
void make_layout(const unsigned int n_active_cells_in, const unsigned int vectorization_length_in, std::vector< unsigned int > &boundary_cells, std::vector< unsigned int > &irregular_cells)
::ExceptionBase & ExcMessage(std::string arg1)
std::vector< Number > constraint_pool_data
#define AssertIndexRange(index, range)
Table< 4, internal::MatrixFreeFunctions::ShapeInfo< Number > > shape_info
void copy_from(const MatrixFree< dim, Number > &matrix_free_base)
std::vector< std::pair< unsigned int, unsigned int > > cell_level_index
unsigned int size() const
virtual const FiniteElement< dim, spacedim > & base_element(const unsigned int index) const
unsigned int n_active_cells() const
unsigned int n_levels() const
std::vector< unsigned int > get_poly_space_numbering() const
bool mapping_is_initialized
unsigned int n_macro_cells() const
cell_iterator end() const
internal::MatrixFreeFunctions::SizeInfo size_info
const hp::FECollection< dim, spacedim > & get_fe() const
unsigned int element_multiplicity(const unsigned int index) const
void print_memory_consumption(STREAM &out) const
unsigned int n_cells() const
void print_memory_statistics(STREAM &out, std::size_t data_length) const
unsigned int global_dof_index
#define Assert(cond, exc)
unsigned int n_components() const
std::size_t memory_consumption(const T &t)
cell_iterator begin(const unsigned int level=0) const
unsigned int n_base_elements() const
TasksParallelScheme tasks_parallel_scheme
MPI_Comm mpi_communicator
std::vector< unsigned int > invert_permutation(const std::vector< unsigned int > &permutation)
unsigned int n_mpi_processes(const MPI_Comm &mpi_communicator)
const unsigned int dofs_per_cell
std::size_t memory_consumption() const
unsigned int tasks_block_size
const Triangulation< dim, spacedim > & get_tria() const
unsigned int level_mg_handler
std::vector< unsigned int > get_poly_space_numbering_inverse() const
const unsigned int dofs_per_face
UpdateFlags mapping_update_flags
unsigned int this_mpi_process(const MPI_Comm &mpi_communicator)
::ExceptionBase & ExcNotImplemented()
MultithreadInfo multithread_info
const Triangulation< dim, spacedim > & get_tria() const
std::vector< internal::MatrixFreeFunctions::DoFInfo > dof_info
MinMaxAvg min_max_avg(const double my_value, const MPI_Comm &mpi_communicator)
::ExceptionBase & ExcInternalError()
void initialize_indices(const std::vector< const ConstraintMatrix * > &constraint, const std::vector< IndexSet > &locally_owned_set)
void print(std::ostream &out) const
const FiniteElement< dim, spacedim > & get_fe() const