Discontinuous Galerkin Library
#include "dg/algorithm.h"
Loading...
Searching...
No Matches
sparseblockmat.h
Go to the documentation of this file.
1#pragma once
2
3#include <cmath>
4#include <numeric>
5#include <thrust/host_vector.h>
6#include "exblas/exdot_serial.h"
7#include "config.h"
8#include "exceptions.h"
9#include "tensor_traits.h"
10#include "sparsematrix.h"
11
12namespace dg
13{
14
44template<class real_type, template <class> class Vector>
46{
49 static constexpr int invalid_index = -1;
51 EllSparseBlockMat() = default;
67 EllSparseBlockMat( int num_block_rows, int num_block_cols,
68 int num_blocks_per_line, int num_different_blocks, int n):
69 data(num_different_blocks*n*n),
70 cols_idx( num_block_rows*num_blocks_per_line),
71 data_idx(cols_idx.size()), right_range(2),
72 num_rows(num_block_rows),
73 num_cols(num_block_cols),
74 blocks_per_line(num_blocks_per_line),
75 n(n), left_size(1), right_size(1)
76 {
77 right_range[0]=0;
78 right_range[1]=1;
79 }
80
81 template< class other_real_type, template<class> class Other_Vector>
82 friend class EllSparseBlockMat; // enable copy
83
84 template< class other_real_type, template<class> class Other_Vector>
93
95 int total_num_rows()const{
97 }
99 int total_num_cols()const{
101 }
102
109
120 template<class value_type>
121 void symv(SharedVectorTag, SerialTag, value_type alpha, const value_type* RESTRICT x, value_type beta, value_type* RESTRICT y) const
122 {
123 launch_multiply_kernel( SerialTag(), alpha, x, beta, y);
124 }
125 template<class value_type>
126 void symv(SharedVectorTag, CudaTag, value_type alpha, const value_type* x, value_type beta, value_type* y) const
127 {
128 launch_multiply_kernel( CudaTag(), alpha, x, beta, y);
129 }
130#ifdef _OPENMP
131 template<class value_type>
132 void symv(SharedVectorTag, OmpTag, value_type alpha, const value_type* x, value_type beta, value_type* y) const
133 {
134 if( !omp_in_parallel())
135 {
136 #pragma omp parallel
137 {
138 launch_multiply_kernel( OmpTag(), alpha, x, beta, y);
139 }
140 return;
141 }
142 launch_multiply_kernel( OmpTag(), alpha, x, beta, y);
143 }
144#endif //_OPENMP
145
148 right_range[0]=0;
150 }
152 void set_right_size( int new_right_size ){
153 right_size = new_right_size;
155 }
157 void set_left_size( int new_left_size ){
158 left_size = new_left_size;
159 }
166 void display( std::ostream& os = std::cout, bool show_data = false) const;
167
168 Vector<real_type> data;
169 Vector<int> cols_idx;
170 Vector<int> data_idx;
171 Vector<int> right_range;
175 int n;
178 private:
179 template<class value_type>
180 void launch_multiply_kernel(SerialTag, value_type alpha, const value_type* RESTRICT x, value_type beta, value_type* RESTRICT y) const;
181 template<class value_type>
182 void launch_multiply_kernel(CudaTag, value_type alpha, const value_type* x, value_type beta, value_type* y) const;
183#ifdef _OPENMP
184 template<class value_type>
185 void launch_multiply_kernel(OmpTag, value_type alpha, const value_type* x, value_type beta, value_type* y) const;
186#endif //_OPENMP
187
188};
189
190// TODO not sure this should be public...
191
192//four classes/files play together in mpi distributed EllSparseBlockMat
193//CooSparseBlockMat and kernels, NearestNeighborComm, RowColDistMat
194//and the creation functions in mpi_derivatives.h
226template<class real_type, template <class > class Vector>
228{
230 CooSparseBlockMat() = default;
240 CooSparseBlockMat( int num_block_rows, int num_block_cols, int n, int left_size, int right_size):
241 num_rows(num_block_rows), num_cols(num_block_cols), num_entries(0),
243
244 template< class other_real_type, template<class> class Other_Vector>
245 friend class CooSparseBlockMat; // enable copy
246
247 template< class other_real_type, template<class> class Other_Vector>
263 void add_value( int row, int col, const Vector<real_type>& element)
264 {
265 assert( (int)element.size() == n*n);
266 int index = data.size()/n/n;
267 data.insert( data.end(), element.begin(), element.end());
268 add_value( row, col, index);
269 }
277 void add_value( int row, int col, int data)
278 {
279 rows_idx.push_back(row);
280 cols_idx.push_back(col);
281 data_idx.push_back( data );
282 num_entries++;
283 }
284
286 int total_num_rows()const{
288 }
290 int total_num_cols()const{
292 }
293
294
295
305 template<class value_type>
306 void symv(SharedVectorTag, SerialTag, value_type alpha, const value_type** x, value_type beta, value_type* RESTRICT y) const
307 {
308 launch_multiply_kernel( SerialTag(), alpha, x, beta, y);
309 }
310 template<class value_type>
311 void symv(SharedVectorTag, CudaTag, value_type alpha, const value_type** x, value_type beta, value_type* y) const
312 {
313 launch_multiply_kernel( CudaTag(), alpha, x, beta, y);
314 }
315#ifdef _OPENMP
316 template<class value_type>
317 void symv(SharedVectorTag, OmpTag, value_type alpha, const value_type** x, value_type beta, value_type* y) const
318 {
319 if( !omp_in_parallel())
320 {
321 #pragma omp parallel
322 {
323 launch_multiply_kernel( OmpTag(), alpha, x, beta, y);
324 }
325 return;
326 }
327 launch_multiply_kernel( OmpTag(), alpha, x, beta, y);
328 }
329#endif //_OPENMP
330
337 void display(std::ostream& os = std::cout, bool show_data = false) const;
338
339 Vector<real_type> data;
340 Vector<int> cols_idx;
341 Vector<int> rows_idx;
342 Vector<int> data_idx;
346 int n;
349 private:
350 template<class value_type>
351 void launch_multiply_kernel(SerialTag, value_type alpha, const value_type** x, value_type beta, value_type* RESTRICT y) const;
352 template<class value_type>
353 void launch_multiply_kernel(CudaTag, value_type alpha, const value_type** x, value_type beta, value_type* y) const;
354#ifdef _OPENMP
355 template<class value_type>
356 void launch_multiply_kernel(OmpTag, value_type alpha, const value_type** x, value_type beta, value_type* y) const;
357#endif //_OPENMP
358};
360
361//template<class real_type, template<class> class Vector>
362//template<class value_type>
363//void EllSparseBlockMat<real_type, Vector>::symv(SharedVectorTag, SerialTag, value_type alpha, const value_type* RESTRICT x, value_type beta, value_type* RESTRICT y) const
364//{
365// //simplest implementation (all optimization must respect the order of operations)
366// for( int s=0; s<left_size; s++)
367// for( int i=0; i<num_rows; i++)
368// for( int k=0; k<n; k++)
369// for( int j=right_range[0]; j<right_range[1]; j++)
370// {
371// int I = ((s*num_rows + i)*n+k)*right_size+j;
372// // if y[I] isnan then even beta = 0 does not make it 0
373// y[I] = beta == 0 ? (value_type)0 : y[I]*beta;
374// for( int d=0; d<blocks_per_line; d++)
375// {
376// value_type temp = 0;
377// int J = cols_idx[i*blocks_per_line+d];
378// if ( J == invalid_index)
379// continue;
380//
381// for( int q=0; q<n; q++) //multiplication-loop
382// temp = DG_FMA( data[ (data_idx[i*blocks_per_line+d]*n + k)*n+q],
383// x[((s*num_cols + J)*n+q)*right_size+j],
384// temp);
385// y[I] = DG_FMA( alpha,temp, y[I]);
386// }
387// }
388//}
389template<class real_type, template<class> class Vector>
391{
392 thrust::host_vector<real_type > values;
393 thrust::host_vector<int> row_indices;
394 thrust::host_vector<int> column_indices;
395 for( int s=0; s<left_size; s++)
396 for( int i=0; i<num_rows; i++)
397 for( int k=0; k<n; k++)
398 for( int j=right_range[0]; j<right_range[1]; j++)
399 {
400 int I = ((s*num_rows + i)*n+k)*right_size+j;
401 for( int d=0; d<blocks_per_line; d++)
402 for( int q=0; q<n; q++) //multiplication-loop
403 {
404 int J = cols_idx[i*blocks_per_line+d];
405 if ( J == invalid_index)
406 continue;
407 row_indices.push_back(I);
408 column_indices.push_back(
409 ((s*num_cols + J)*n+q)*right_size+j);
410 values.push_back(data[ (data_idx[i*blocks_per_line+d]*n + k)*n+q]);
411 }
412 }
414 A.setFromCoo( total_num_rows(), total_num_cols(), row_indices, column_indices, values);
415 return A;
416}
417
418//template<class real_type, template<class> class Vector>
419//template<class value_type>
420//void CooSparseBlockMat<real_type, Vector>::symv( SharedVectorTag, SerialTag, value_type alpha, const value_type** x, value_type beta, value_type* RESTRICT y) const
421//{
422// if( num_entries==0)
423// return;
424// if( beta!= 1 )
425// std::cerr << "Beta != 1 yields wrong results in CooSparseBlockMat!! Beta = "<<beta<<"\n";
426// assert( beta == 1 && "Beta != 1 yields wrong results in CooSparseBlockMat!!");
427// // In fact, Beta is ignored in the following code
428// // beta == 1 avoids the need to access all values in y, just the cols we want
429// // This makes symv a sparse vector = sparse matrix x sparse vector operation
430//
431// //simplest implementation (sums block by block)
432// for( int s=0; s<left_size; s++)
433// for( int k=0; k<n; k++)
434// for( int j=0; j<right_size; j++)
435// for( int i=0; i<num_entries; i++)
436// {
437// value_type temp = 0;
438// for( int q=0; q<n; q++) //multiplication-loop
439// temp = DG_FMA( data[ (data_idx[i]*n + k)*n+q],
440// //x[((s*num_cols + cols_idx[i])*n+q)*right_size+j],
441// x[cols_idx[i]][(q*left_size +s )*right_size+j],
442// temp);
443// int I = ((s*num_rows + rows_idx[i])*n+k)*right_size+j;
444// y[I] = DG_FMA( alpha,temp, y[I]);
445// }
446//}
447
448template<class T, template<class> class Vector>
449void EllSparseBlockMat<T, Vector>::display( std::ostream& os, bool show_data ) const
450{
451 os << "Data array has "<<data.size()/n/n<<" blocks of size "<<n<<"x"<<n<<"\n";
452 os << "num_rows "<<num_rows<<"\n";
453 os << "num_cols "<<num_cols<<"\n";
454 os << "blocks_per_line "<<blocks_per_line<<"\n";
455 os << "n "<<n<<"\n";
456 os << "left_size "<<left_size<<"\n";
457 os << "right_size "<<right_size<<"\n";
458 os << "right_range_0 "<<right_range[0]<<"\n";
459 os << "right_range_1 "<<right_range[1]<<"\n";
460 os << "Column indices: \n";
461 for( int i=0; i<num_rows; i++)
462 {
463 for( int d=0; d<blocks_per_line; d++)
464 os << cols_idx[i*blocks_per_line + d] <<" ";
465 os << "\n";
466 }
467 os << "\n Data indices: \n";
468 for( int i=0; i<num_rows; i++)
469 {
470 for( int d=0; d<blocks_per_line; d++)
471 os << data_idx[i*blocks_per_line + d] <<" ";
472 os << "\n";
473 }
474 if(show_data)
475 {
476 os << "\n Data: \n";
477 for( unsigned i=0; i<data.size()/n/n; i++)
478 for(unsigned k=0; k<n*n; k++)
479 {
481 res.d = data[i*n*n+k];
482 os << "idx "<<i<<" "<<res.d <<"\t"<<res.i<<"\n";
483 }
484 }
485 os << std::endl;
486}
487
488template<class real_type, template<class> class Vector>
489void CooSparseBlockMat<real_type, Vector>::display( std::ostream& os, bool show_data) const
490{
491 os << "Data array has "<<data.size()/n/n<<" blocks of size "<<n<<"x"<<n<<"\n";
492 os << "num_rows "<<num_rows<<"\n";
493 os << "num_cols "<<num_cols<<"\n";
494 os << "num_entries "<<num_entries<<"\n";
495 os << "n "<<n<<"\n";
496 os << "left_size "<<left_size<<"\n";
497 os << "right_size "<<right_size<<"\n";
498 os << "row\tcolumn\tdata:\n";
499 for( int i=0; i<num_entries; i++)
500 os << rows_idx[i]<<"\t"<<cols_idx[i] <<"\t"<<data_idx[i]<<"\n";
501 if(show_data)
502 {
503 os << "\n Data: \n";
504 for( unsigned i=0; i<data.size()/n/n; i++)
505 for(unsigned k=0; k<n*n; k++)
506 {
508 res.d = data[i*n*n+k];
509 os << "idx "<<i<<" "<<res.d <<"\t"<<res.i<<"\n";
510 }
511 }
512 os << std::endl;
513
514}
515
519template <class T, template<class> class V>
526template <class T, template<class> class V>
534
535} //namespace dg
536
538#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
539#include "sparseblockmat_gpu_kernels.cuh"
540#elif THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP
541#include "sparseblockmat_omp_kernels.h"
542#endif
Error classes or the dg library.
@ y
y direction
@ x
x direction
typename TensorTraits< std::decay_t< Vector > >::execution_policy get_execution_policy
Definition tensor_traits.h:49
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...
Coo Sparse Block Matrix format.
Definition sparseblockmat.h:228
void display(std::ostream &os=std::cout, bool show_data=false) const
Display internal data to a stream.
CooSparseBlockMat(int num_block_rows, int num_block_cols, int n, int left_size, int right_size)
Allocate storage.
Definition sparseblockmat.h:240
int num_entries
number of entries in the matrix
Definition sparseblockmat.h:345
void symv(SharedVectorTag, CudaTag, value_type alpha, const value_type **x, value_type beta, value_type *y) const
Definition sparseblockmat.h:311
int total_num_cols() const
total number of columns is num_cols*n*left_size*right_size
Definition sparseblockmat.h:290
int left_size
size of the left Kronecker delta
Definition sparseblockmat.h:347
Vector< int > rows_idx
is of size num_entries and contains the row indices
Definition sparseblockmat.h:341
Vector< real_type > data
The data array is of size n*n*num_different_blocks and contains the blocks.
Definition sparseblockmat.h:339
CooSparseBlockMat()=default
default constructor does nothing
int right_size
size of the right Kronecker delta (is e.g 1 for a x - derivative)
Definition sparseblockmat.h:348
void add_value(int row, int col, int data)
Convenience function to assemble the matrix.
Definition sparseblockmat.h:277
Vector< int > data_idx
is of size num_entries and contains indices into the data array
Definition sparseblockmat.h:342
int num_cols
number of columns (never actually used with pointer approach
Definition sparseblockmat.h:344
int n
each block has size n*n
Definition sparseblockmat.h:346
CooSparseBlockMat(const CooSparseBlockMat< other_real_type, Other_Vector > &src)
Definition sparseblockmat.h:248
void add_value(int row, int col, const Vector< real_type > &element)
Convenience function to assemble the matrix.
Definition sparseblockmat.h:263
int num_rows
number of rows
Definition sparseblockmat.h:343
int total_num_rows() const
total number of rows is num_rows*n*left_size*right_size
Definition sparseblockmat.h:286
Vector< int > cols_idx
is of size num_entries and contains the column indices
Definition sparseblockmat.h:340
void symv(SharedVectorTag, SerialTag, value_type alpha, const value_type **x, value_type beta, value_type *RESTRICT y) const
Apply the matrix to a vector.
Definition sparseblockmat.h:306
CUDA implementation.
Definition execution_policy.h:27
Ell Sparse Block Matrix format.
Definition sparseblockmat.h:46
int right_size
size of the right Kronecker delta (is e.g 1 for a x - derivative)
Definition sparseblockmat.h:177
void set_left_size(int new_left_size)
Set left_size = new_left_size;
Definition sparseblockmat.h:157
static constexpr int invalid_index
Definition sparseblockmat.h:49
void set_default_range()
Set right_range[0] = 0, right_range[1] = right_size
Definition sparseblockmat.h:147
int num_rows
number of block rows, each row contains blocks
Definition sparseblockmat.h:172
Vector< real_type > data
The data array is of size n*n*num_different_blocks and contains the blocks. The first block is contai...
Definition sparseblockmat.h:168
int blocks_per_line
number of blocks in each line
Definition sparseblockmat.h:174
int num_cols
number of block columns
Definition sparseblockmat.h:173
EllSparseBlockMat(int num_block_rows, int num_block_cols, int num_blocks_per_line, int num_different_blocks, int n)
Allocate storage.
Definition sparseblockmat.h:67
dg::SparseMatrix< int, real_type, thrust::host_vector > asCuspMatrix() const
Convert to a sparse matrix.
EllSparseBlockMat(const EllSparseBlockMat< other_real_type, Other_Vector > &src)
Definition sparseblockmat.h:85
void display(std::ostream &os=std::cout, bool show_data=false) const
Display internal data to a stream.
Vector< int > data_idx
has the same size as cols_idx and contains indices into the data array, i.e. the block number
Definition sparseblockmat.h:170
EllSparseBlockMat()=default
default constructor does nothing
int total_num_cols() const
total number of columns is num_cols*n*left_size*right_size
Definition sparseblockmat.h:99
void set_right_size(int new_right_size)
Set right_size = new_right_size; set_default_range();
Definition sparseblockmat.h:152
Vector< int > cols_idx
is of size num_rows*num_blocks_per_line and contains the column indices % n into the vector
Definition sparseblockmat.h:169
int total_num_rows() const
total number of rows is num_rows*n*left_size*right_size
Definition sparseblockmat.h:95
Vector< int > right_range
range (can be used to apply the matrix to only part of the right rows
Definition sparseblockmat.h:171
void symv(SharedVectorTag, SerialTag, value_type alpha, const value_type *RESTRICT x, value_type beta, value_type *RESTRICT y) const
Apply the matrix to a vector.
Definition sparseblockmat.h:121
int left_size
size of the left Kronecker delta
Definition sparseblockmat.h:176
int n
each block has size n*n
Definition sparseblockmat.h:175
void symv(SharedVectorTag, CudaTag, value_type alpha, const value_type *x, value_type beta, value_type *y) const
Definition sparseblockmat.h:126
OpenMP parallel execution.
Definition execution_policy.h:28
Indicate sequential execution.
Definition execution_policy.h:26
Indicate a contiguous chunk of shared memory.
Definition vector_categories.h:41
indicate our sparse block matrix format
Definition matrix_categories.h:33
A CSR formatted sparse matrix.
Definition sparsematrix.h:96
void setFromCoo(size_t num_rows, size_t num_cols, const Vector< Index > &row_indices, const Vector< Index > &column_indices, const Vector< Value > &values, bool sort=false)
Set csr values from coo formatted sparse matrix.
Definition sparsematrix.h:171
dg::get_execution_policy< V< T > > execution_policy
Definition sparseblockmat.h:531
T value_type
Definition sparseblockmat.h:529
dg::get_execution_policy< V< T > > execution_policy
Definition sparseblockmat.h:524
T value_type
Definition sparseblockmat.h:522
The vector traits.
Definition tensor_traits.h:38