5#include <thrust/host_vector.h>
6#include "exblas/exdot_serial.h"
44template<
class real_type,
template <
class>
class Vector>
68 int num_blocks_per_line,
int num_different_blocks,
int n):
69 data(num_different_blocks*
n*
n),
70 cols_idx( num_block_rows*num_blocks_per_line),
81 template<
class other_real_type,
template<
class>
class Other_Vector>
84 template<
class other_real_type,
template<
class>
class Other_Vector>
120 template<
class value_type>
123 launch_multiply_kernel(
SerialTag(), alpha,
x, beta,
y);
125 template<
class value_type>
128 launch_multiply_kernel(
CudaTag(), alpha,
x, beta,
y);
131 template<
class value_type>
134 if( !omp_in_parallel())
138 launch_multiply_kernel(
OmpTag(), alpha, x, beta,
y);
142 launch_multiply_kernel( OmpTag(), alpha, x, beta,
y);
166 void display( std::ostream& os = std::cout,
bool show_data =
false)
const;
179 template<
class value_type>
180 void launch_multiply_kernel(
SerialTag, value_type alpha,
const value_type* RESTRICT x, value_type beta, value_type* RESTRICT
y)
const;
181 template<
class value_type>
182 void launch_multiply_kernel(
CudaTag, value_type alpha,
const value_type* x, value_type beta, value_type*
y)
const;
184 template<
class value_type>
185 void launch_multiply_kernel(
OmpTag, value_type alpha,
const value_type* x, value_type beta, value_type*
y)
const;
226template<
class real_type,
template <
class >
class Vector>
244 template<
class other_real_type,
template<
class>
class Other_Vector>
247 template<
class other_real_type,
template<
class>
class Other_Vector>
263 void add_value(
int row,
int col,
const Vector<real_type>& element)
265 assert( (
int)element.size() ==
n*
n);
266 int index =
data.size()/
n/
n;
267 data.insert(
data.end(), element.begin(), element.end());
305 template<
class value_type>
308 launch_multiply_kernel(
SerialTag(), alpha,
x, beta,
y);
310 template<
class value_type>
313 launch_multiply_kernel(
CudaTag(), alpha,
x, beta,
y);
316 template<
class value_type>
319 if( !omp_in_parallel())
323 launch_multiply_kernel(
OmpTag(), alpha, x, beta,
y);
327 launch_multiply_kernel( OmpTag(), alpha, x, beta,
y);
337 void display(std::ostream& os = std::cout,
bool show_data =
false)
const;
350 template<
class value_type>
351 void launch_multiply_kernel(
SerialTag, value_type alpha,
const value_type** x, value_type beta, value_type* RESTRICT
y)
const;
352 template<
class value_type>
353 void launch_multiply_kernel(
CudaTag, value_type alpha,
const value_type** x, value_type beta, value_type*
y)
const;
355 template<
class value_type>
356 void launch_multiply_kernel(
OmpTag, value_type alpha,
const value_type** x, value_type beta, value_type*
y)
const;
389template<
class real_type,
template<
class>
class Vector>
392 thrust::host_vector<real_type > values;
393 thrust::host_vector<int> row_indices;
394 thrust::host_vector<int> column_indices;
395 for(
int s=0; s<left_size; s++)
396 for(
int i=0; i<num_rows; i++)
397 for(
int k=0; k<n; k++)
398 for(
int j=right_range[0]; j<right_range[1]; j++)
400 int I = ((s*num_rows + i)*n+k)*right_size+j;
401 for(
int d=0; d<blocks_per_line; d++)
402 for(
int q=0; q<n; q++)
404 int J = cols_idx[i*blocks_per_line+d];
405 if ( J == invalid_index)
407 row_indices.push_back(I);
408 column_indices.push_back(
409 ((s*num_cols + J)*n+q)*right_size+j);
410 values.push_back(data[ (data_idx[i*blocks_per_line+d]*n + k)*n+q]);
414 A.
setFromCoo( total_num_rows(), total_num_cols(), row_indices, column_indices, values);
448template<
class T,
template<
class>
class Vector>
451 os <<
"Data array has "<<data.size()/n/n<<
" blocks of size "<<n<<
"x"<<n<<
"\n";
452 os <<
"num_rows "<<num_rows<<
"\n";
453 os <<
"num_cols "<<num_cols<<
"\n";
454 os <<
"blocks_per_line "<<blocks_per_line<<
"\n";
456 os <<
"left_size "<<left_size<<
"\n";
457 os <<
"right_size "<<right_size<<
"\n";
458 os <<
"right_range_0 "<<right_range[0]<<
"\n";
459 os <<
"right_range_1 "<<right_range[1]<<
"\n";
460 os <<
"Column indices: \n";
461 for(
int i=0; i<num_rows; i++)
463 for(
int d=0; d<blocks_per_line; d++)
464 os << cols_idx[i*blocks_per_line + d] <<
" ";
467 os <<
"\n Data indices: \n";
468 for(
int i=0; i<num_rows; i++)
470 for(
int d=0; d<blocks_per_line; d++)
471 os << data_idx[i*blocks_per_line + d] <<
" ";
477 for(
unsigned i=0; i<data.size()/n/n; i++)
478 for(
unsigned k=0; k<n*n; k++)
481 res.
d = data[i*n*n+k];
482 os <<
"idx "<<i<<
" "<<res.
d <<
"\t"<<res.
i<<
"\n";
488template<
class real_type,
template<
class>
class Vector>
491 os <<
"Data array has "<<data.size()/n/n<<
" blocks of size "<<n<<
"x"<<n<<
"\n";
492 os <<
"num_rows "<<num_rows<<
"\n";
493 os <<
"num_cols "<<num_cols<<
"\n";
494 os <<
"num_entries "<<num_entries<<
"\n";
496 os <<
"left_size "<<left_size<<
"\n";
497 os <<
"right_size "<<right_size<<
"\n";
498 os <<
"row\tcolumn\tdata:\n";
499 for(
int i=0; i<num_entries; i++)
500 os << rows_idx[i]<<
"\t"<<cols_idx[i] <<
"\t"<<data_idx[i]<<
"\n";
504 for(
unsigned i=0; i<data.size()/n/n; i++)
505 for(
unsigned k=0; k<n*n; k++)
508 res.
d = data[i*n*n+k];
509 os <<
"idx "<<i<<
" "<<res.
d <<
"\t"<<res.
i<<
"\n";
519template <
class T,
template<
class>
class V>
526template <
class T,
template<
class>
class V>
538#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
539#include "sparseblockmat_gpu_kernels.cuh"
540#elif THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP
541#include "sparseblockmat_omp_kernels.h"
Error classes or the dg library.
typename TensorTraits< std::decay_t< Vector > >::execution_policy get_execution_policy
Definition tensor_traits.h:49
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...
Coo Sparse Block Matrix format.
Definition sparseblockmat.h:228
void display(std::ostream &os=std::cout, bool show_data=false) const
Display internal data to a stream.
CooSparseBlockMat(int num_block_rows, int num_block_cols, int n, int left_size, int right_size)
Allocate storage.
Definition sparseblockmat.h:240
int num_entries
number of entries in the matrix
Definition sparseblockmat.h:345
void symv(SharedVectorTag, CudaTag, value_type alpha, const value_type **x, value_type beta, value_type *y) const
Definition sparseblockmat.h:311
int total_num_cols() const
total number of columns is num_cols*n*left_size*right_size
Definition sparseblockmat.h:290
int left_size
size of the left Kronecker delta
Definition sparseblockmat.h:347
Vector< int > rows_idx
is of size num_entries and contains the row indices
Definition sparseblockmat.h:341
Vector< real_type > data
The data array is of size n*n*num_different_blocks and contains the blocks.
Definition sparseblockmat.h:339
CooSparseBlockMat()=default
default constructor does nothing
int right_size
size of the right Kronecker delta (is e.g 1 for a x - derivative)
Definition sparseblockmat.h:348
void add_value(int row, int col, int data)
Convenience function to assemble the matrix.
Definition sparseblockmat.h:277
Vector< int > data_idx
is of size num_entries and contains indices into the data array
Definition sparseblockmat.h:342
int num_cols
number of columns (never actually used with pointer approach
Definition sparseblockmat.h:344
int n
each block has size n*n
Definition sparseblockmat.h:346
CooSparseBlockMat(const CooSparseBlockMat< other_real_type, Other_Vector > &src)
Definition sparseblockmat.h:248
void add_value(int row, int col, const Vector< real_type > &element)
Convenience function to assemble the matrix.
Definition sparseblockmat.h:263
int num_rows
number of rows
Definition sparseblockmat.h:343
int total_num_rows() const
total number of rows is num_rows*n*left_size*right_size
Definition sparseblockmat.h:286
Vector< int > cols_idx
is of size num_entries and contains the column indices
Definition sparseblockmat.h:340
void symv(SharedVectorTag, SerialTag, value_type alpha, const value_type **x, value_type beta, value_type *RESTRICT y) const
Apply the matrix to a vector.
Definition sparseblockmat.h:306
CUDA implementation.
Definition execution_policy.h:27
Ell Sparse Block Matrix format.
Definition sparseblockmat.h:46
int right_size
size of the right Kronecker delta (is e.g 1 for a x - derivative)
Definition sparseblockmat.h:177
void set_left_size(int new_left_size)
Set left_size = new_left_size;
Definition sparseblockmat.h:157
static constexpr int invalid_index
Definition sparseblockmat.h:49
void set_default_range()
Set right_range[0] = 0, right_range[1] = right_size
Definition sparseblockmat.h:147
int num_rows
number of block rows, each row contains blocks
Definition sparseblockmat.h:172
Vector< real_type > data
The data array is of size n*n*num_different_blocks and contains the blocks. The first block is contai...
Definition sparseblockmat.h:168
int blocks_per_line
number of blocks in each line
Definition sparseblockmat.h:174
int num_cols
number of block columns
Definition sparseblockmat.h:173
EllSparseBlockMat(int num_block_rows, int num_block_cols, int num_blocks_per_line, int num_different_blocks, int n)
Allocate storage.
Definition sparseblockmat.h:67
dg::SparseMatrix< int, real_type, thrust::host_vector > asCuspMatrix() const
Convert to a sparse matrix.
EllSparseBlockMat(const EllSparseBlockMat< other_real_type, Other_Vector > &src)
Definition sparseblockmat.h:85
void display(std::ostream &os=std::cout, bool show_data=false) const
Display internal data to a stream.
Vector< int > data_idx
has the same size as cols_idx and contains indices into the data array, i.e. the block number
Definition sparseblockmat.h:170
EllSparseBlockMat()=default
default constructor does nothing
int total_num_cols() const
total number of columns is num_cols*n*left_size*right_size
Definition sparseblockmat.h:99
void set_right_size(int new_right_size)
Set right_size = new_right_size; set_default_range();
Definition sparseblockmat.h:152
Vector< int > cols_idx
is of size num_rows*num_blocks_per_line and contains the column indices % n into the vector
Definition sparseblockmat.h:169
int total_num_rows() const
total number of rows is num_rows*n*left_size*right_size
Definition sparseblockmat.h:95
Vector< int > right_range
range (can be used to apply the matrix to only part of the right rows
Definition sparseblockmat.h:171
void symv(SharedVectorTag, SerialTag, value_type alpha, const value_type *RESTRICT x, value_type beta, value_type *RESTRICT y) const
Apply the matrix to a vector.
Definition sparseblockmat.h:121
int left_size
size of the left Kronecker delta
Definition sparseblockmat.h:176
int n
each block has size n*n
Definition sparseblockmat.h:175
void symv(SharedVectorTag, CudaTag, value_type alpha, const value_type *x, value_type beta, value_type *y) const
Definition sparseblockmat.h:126
OpenMP parallel execution.
Definition execution_policy.h:28
Indicate sequential execution.
Definition execution_policy.h:26
Indicate a contiguous chunk of shared memory.
Definition vector_categories.h:41
indicate our sparse block matrix format
Definition matrix_categories.h:33
A CSR formatted sparse matrix.
Definition sparsematrix.h:96
void setFromCoo(size_t num_rows, size_t num_cols, const Vector< Index > &row_indices, const Vector< Index > &column_indices, const Vector< Value > &values, bool sort=false)
Set csr values from coo formatted sparse matrix.
Definition sparsematrix.h:171
dg::get_execution_policy< V< T > > execution_policy
Definition sparseblockmat.h:531
T value_type
Definition sparseblockmat.h:529
dg::get_execution_policy< V< T > > execution_policy
Definition sparseblockmat.h:524
T value_type
Definition sparseblockmat.h:522
The vector traits.
Definition tensor_traits.h:38