19template<
class MatrixType,
class ContainerType1,
class ContainerType2>
20void symv( MatrixType&& M,
21 const ContainerType1&
x,
23template<
class FunctorType,
class MatrixType,
class ContainerType1,
class ContainerType2>
24void stencil( FunctorType f, MatrixType&& M,
25 const ContainerType1&
x,
27template<
class MatrixType,
class ContainerType1,
class ContainerType2>
30 const ContainerType1&
x,
35template<
class Stencil,
class ContainerType,
class ...ContainerTypes>
141template<
template<
class >
class Vector,
class LocalMatrixInner,
class LocalMatrixOuter = LocalMatrixInner>
146 const LocalMatrixOuter& outside,
152 template<
template<
class>
class V,
class LI,
class LO>
155 template<
template<
class>
class V,
class LI,
class LO>
157 : m_i(src.m_i), m_o(src.m_o), m_g(src.m_g)
183 template<
class ContainerType1,
class ContainerType2>
189 if( !m_g.isCommunicating())
196 MPI_Comm_compare(
x.communicator(),
y.communicator(), &result);
197 assert( result == MPI_CONGRUENT || result == MPI_IDENT);
200 m_buffer_ptrs.template set<const value_type*>( m_o.num_cols);
201 auto& buffer_ptrs = m_buffer_ptrs.template get<const value_type*>();
203 m_g.global_gather_init(
x.data());
207 m_g.global_gather_wait(
x.data(), buffer_ptrs);
209 if( buffer_ptrs.size() > 0)
212 const value_type** b_ptrs = thrust::raw_pointer_cast( buffer_ptrs.data());
213 value_type* y_ptr = thrust::raw_pointer_cast(
y.data().data());
215 alpha, b_ptrs, value_type(1.), y_ptr);
218 template<
class ContainerType1,
class ContainerType2>
219 void symv(
const ContainerType1& x, ContainerType2&
y)
const
224 LocalMatrixInner m_i;
225 LocalMatrixOuter m_o;
227 mutable detail::AnyVector<Vector> m_buffer_ptrs;
241template<
class real_type,
class ConversionPolicyRows,
class ConversionPolicyCols>
244 const ConversionPolicyRows& g_rows,
245 const ConversionPolicyCols& g_cols
259 MPI_Comm_compare( g_rows.communicator(), g_cols.communicator(), &result);
260 assert( result == MPI_CONGRUENT or result == MPI_IDENT);
262 MPI_Comm_rank( g_cols.communicator(), &rank);
264 MPI_Cartdim_get( g_cols.communicator(), &ndim);
266 int dim, period, coord;
267 MPI_Cart_get( g_cols.communicator(), 1, &dim, &period, &coord);
273 unsigned n = src.
n, rn = g_rows.n()/n, cn = g_cols.n()/n;
276 bpl, src.
data.size()/(src.
n*src.
n), src.
n);
281 std::vector<bool> local_row( inner.
num_rows,
true);
283 for(
int i=0; i<inner.
num_rows; i++)
284 for(
int k=0; k<bpl; k++)
287 assert( g_rows.local2globalIdx( i*src.
n, rank, gIdx));
290 if( inner.
cols_idx[i*bpl+k] == invalid_idx)
293 int lIdx = 0, pid = 0;
295 assert( g_cols.global2localIdx( gIdx, lIdx, pid));
297 local_row[i] =
false;
300 thrust::host_vector<std::array<int,2>> gColIdx;
301 thrust::host_vector<int> rowIdx;
302 thrust::host_vector<int> dataIdx;
303 for(
int i=0; i<inner.
num_rows; i++)
304 for(
int k=0; k<bpl; k++)
306 int gIdx = inner.
cols_idx[i*bpl + k];
307 int lIdx = 0, pid = 0;
308 if( gIdx != invalid_idx)
310 assert( g_cols.global2localIdx( gIdx*src.
n, lIdx, pid));
318 if( gIdx != invalid_idx)
320 rowIdx.push_back( i);
321 dataIdx.push_back( inner.
data_idx[i*bpl+k]);
322 gColIdx.push_back( {pid, lIdx});
323 inner.
cols_idx[i*bpl + k] = invalid_idx;
332 thrust::host_vector<int> lColIdx;
333 auto gather_map = dg::gIdx2unique_idx( gColIdx, lColIdx);
393template<
template<
class >
class Vector,
class LocalMatrixInner,
class LocalMatrixOuter = LocalMatrixInner>
405 : m_i(m), m_dist(row_dist){}
407 template<
template<
class>
class V,
class LI,
class LO>
428 const LocalMatrixOuter& outside,
443 : m_i(m), m_dist(allreduce), m_reduce(comm){}
457 template<
template<
class>
class OtherVector,
class OtherMatrixInner,
class OtherMatrixOuter>
460 m_scatter( src.m_scatter), m_dist( src.m_dist), m_reduce(src.
communicator())
472 const Vector<int>&
scatter()
const {
return m_scatter;}
484 template<
class ContainerType1,
class ContainerType2>
485 void symv(
const ContainerType1& x, ContainerType2&
y)
const
488 if( !m_g.isCommunicating())
491 if( m_dist == allreduce)
493 m_reduce.reduce(
y.data());
498 assert( m_dist == row_dist);
500 MPI_Comm_compare(
x.communicator(),
y.communicator(), &result);
501 assert( result == MPI_CONGRUENT || result == MPI_IDENT);
504 m_recv_buffer.template set<value_type>( m_g.buffer_size());
505 m_comp_buffer.template set<value_type>( m_o.num_rows());
506 auto& recv_buffer = m_recv_buffer.template get<value_type>();
507 auto& comp_buffer = m_comp_buffer.template get<value_type>();
510 m_g.global_gather_init(
x.data(), recv_buffer);
514 m_g.global_gather_wait( recv_buffer);
515 if( comp_buffer.size() > 0)
519 unsigned size = comp_buffer.size();
522 const int* idx, value_type*
y) {
524 }, size, comp_buffer, m_scatter,
y.data());
536 template<
class Functor,
class ContainerType1,
class ContainerType2>
537 void stencil(
const Functor f,
const ContainerType1& x, ContainerType2&
y)
const
540 if( !m_g.isCommunicating())
543 if( m_dist == allreduce)
545 m_reduce.reduce(
y.data());
549 assert( m_dist == row_dist);
551 MPI_Comm_compare(
x.communicator(),
y.communicator(), &result);
552 assert( result == MPI_CONGRUENT || result == MPI_IDENT);
555 m_recv_buffer.template set<value_type>( m_g.buffer_size());
556 m_comp_buffer.template set<value_type>( m_o.num_rows());
557 auto& recv_buffer = m_recv_buffer.template get<value_type>();
558 auto& comp_buffer = m_comp_buffer.template get<value_type>();
561 m_g.global_gather_init(
x.data(), recv_buffer);
565 m_g.global_gather_wait( recv_buffer);
568 unsigned size = comp_buffer.size();
570 [size]
DG_DEVICE(
unsigned i,
const value_type*
x,
571 const int* idx, value_type*
y) {
573 }, size, comp_buffer, m_scatter,
y.data());
580 LocalMatrixInner m_i;
581 LocalMatrixOuter m_o;
583 Vector<int> m_scatter;
584 enum dist_type m_dist;
585 detail::MPIAllreduce m_reduce;
586 mutable detail::AnyVector<Vector> m_recv_buffer, m_comp_buffer ;
593template<
template<
class>
class V,
class LI,
class LO>
599template<
template<
class>
class V,
class LI,
class LO>
void symv(MatrixType &&M, const ContainerType1 &x, ContainerType2 &y)
Definition blas2.h:325
void stencil(FunctorType f, MatrixType &&M, const ContainerType1 &x, ContainerType2 &y)
Definition blas2.h:454
typename TensorTraits< std::decay_t< Vector > >::execution_policy get_execution_policy
Definition tensor_traits.h:49
typename TensorTraits< std::decay_t< Vector > >::value_type get_value_type
Definition tensor_traits.h:45
void mpi_gather(const thrust::host_vector< std::array< int, 2 > > &gather_map, const ContainerType &gatherFrom, ContainerType &result, MPI_Comm comm)
Un-optimized distributed gather operation.
Definition mpi_permutation.h:149
auto make_mpi_sparseblockmat(const EllSparseBlockMat< real_type, thrust::host_vector > &src, const ConversionPolicyRows &g_rows, const ConversionPolicyCols &g_cols)
Split given EllSparseBlockMat into computation and communication part.
Definition mpi_matrix.h:242
#define DG_DEVICE
Expands to __host__ __device__ if compiled with nvcc else is empty.
Definition dg_doc.h:378
void doParallelFor(SharedVectorTag, Stencil f, unsigned N, ContainerType &&x, ContainerTypes &&... xs)
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...
Coo Sparse Block Matrix format.
Definition sparseblockmat.h:228
int num_entries
number of entries in the matrix
Definition sparseblockmat.h:345
Vector< int > rows_idx
is of size num_entries and contains the row indices
Definition sparseblockmat.h:341
Vector< real_type > data
The data array is of size n*n*num_different_blocks and contains the blocks.
Definition sparseblockmat.h:339
Vector< int > data_idx
is of size num_entries and contains indices into the data array
Definition sparseblockmat.h:342
Vector< int > cols_idx
is of size num_entries and contains the column indices
Definition sparseblockmat.h:340
Ell Sparse Block Matrix format.
Definition sparseblockmat.h:46
int right_size
size of the right Kronecker delta (is e.g 1 for a x - derivative)
Definition sparseblockmat.h:177
void set_left_size(int new_left_size)
Set left_size = new_left_size;
Definition sparseblockmat.h:157
int num_rows
number of block rows, each row contains blocks
Definition sparseblockmat.h:172
Vector< real_type > data
The data array is of size n*n*num_different_blocks and contains the blocks. The first block is contai...
Definition sparseblockmat.h:168
int blocks_per_line
number of blocks in each line
Definition sparseblockmat.h:174
int num_cols
number of block columns
Definition sparseblockmat.h:173
Vector< int > data_idx
has the same size as cols_idx and contains indices into the data array, i.e. the block number
Definition sparseblockmat.h:170
void set_right_size(int new_right_size)
Set right_size = new_right_size; set_default_range();
Definition sparseblockmat.h:152
Vector< int > cols_idx
is of size num_rows*num_blocks_per_line and contains the column indices % n into the vector
Definition sparseblockmat.h:169
int left_size
size of the left Kronecker delta
Definition sparseblockmat.h:176
int n
each block has size n*n
Definition sparseblockmat.h:175
Distributed memory matrix class, asynchronous communication.
Definition mpi_matrix.h:395
const MPIGather< Vector > & mpi_gather() const
Read access to the communication object.
Definition mpi_matrix.h:467
MPIDistMat()=default
no memory allocation
MPIDistMat(const LocalMatrixInner &m, MPI_Comm comm)
Allreduce dist type.
Definition mpi_matrix.h:442
const LocalMatrixInner & inner_matrix() const
Read access to the inner matrix.
Definition mpi_matrix.h:463
const Vector< int > & scatter() const
Definition mpi_matrix.h:472
MPIDistMat(const LocalMatrixInner &inside, const LocalMatrixOuter &outside, const MPIGather< Vector > &mpi_gather, const Vector< int > &scatter)
Constructor.
Definition mpi_matrix.h:427
MPI_Comm communicator() const
Read access to the MPI Communicator.
Definition mpi_matrix.h:470
void stencil(const Functor f, const ContainerType1 &x, ContainerType2 &y) const
Stencil computations.
Definition mpi_matrix.h:537
MPIDistMat(const MPIDistMat< OtherVector, OtherMatrixInner, OtherMatrixOuter > &src)
Copy constructor.
Definition mpi_matrix.h:458
MPIDistMat(const LocalMatrixInner &m)
Only local computations.
Definition mpi_matrix.h:404
const LocalMatrixOuter & outer_matrix() const
Read access to the outer matrix.
Definition mpi_matrix.h:465
void symv(const ContainerType1 &x, ContainerType2 &y) const
Matrix Vector product.
Definition mpi_matrix.h:485
Optimized MPI Gather operation.
Definition mpi_gather.h:455
Communicator for asynchronous communication of MPISparseBlockMat.
Definition mpi_gather_kron.h:144
indicate one of our mpi matrices
Definition matrix_categories.h:31
Distributed memory Sparse block matrix class, asynchronous communication.
Definition mpi_matrix.h:143
LocalMatrixInner & inner_matrix()
Write access to the inner matrix.
Definition mpi_matrix.h:165
MPISparseBlockMat(const LocalMatrixInner &inside, const LocalMatrixOuter &outside, const MPIKroneckerGather< Vector > &mpi_gather)
Definition mpi_matrix.h:145
void symv(dg::get_value_type< ContainerType1 > alpha, const ContainerType1 &x, dg::get_value_type< ContainerType1 > beta, ContainerType2 &y) const
Matrix Vector product.
Definition mpi_matrix.h:184
const LocalMatrixInner & inner_matrix() const
Read access to the inner matrix.
Definition mpi_matrix.h:161
LocalMatrixOuter & outer_matrix()
Write access to the outer matrix.
Definition mpi_matrix.h:167
void symv(const ContainerType1 &x, ContainerType2 &y) const
Definition mpi_matrix.h:219
const LocalMatrixOuter & outer_matrix() const
Read access to the outer matrix.
Definition mpi_matrix.h:163
MPISparseBlockMat()=default
MPISparseBlockMat(const MPISparseBlockMat< V, LI, LO > &src)
Definition mpi_matrix.h:156
MPI_Comm communicator() const
Definition mpi_matrix.h:169
Indicate a contiguous chunk of shared memory.
Definition vector_categories.h:41
get_value_type< LI > value_type
value type
Definition mpi_matrix.h:602
get_value_type< LI > value_type
value type
Definition mpi_matrix.h:596
The vector traits.
Definition tensor_traits.h:38