5#include <thrust/sort.h>
6#include <thrust/gather.h>
7#include <thrust/sequence.h>
8#include <thrust/binary_search.h>
13#include "blas2_stencil.h"
16#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
18#elif THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP
27template<
class I0,
class I1>
34 dg::blas2::detail::doParallelFor_dispatch( policy(), csr.size()-1,
35 []
DG_DEVICE(
unsigned i,
const I0_t* csr_ptr, I1_t* coo_ptr)
37 for (
int jj = csr_ptr[i]; jj < csr_ptr[i+1]; jj++)
40 thrust::raw_pointer_cast(csr.data()),
41 thrust::raw_pointer_cast(coo.data()));
51template<
class I0,
class I1>
56 thrust::lower_bound( coo.begin(), coo.end(),
65I
coo2csr(
unsigned num_rows,
const I& coo)
94template<
class Index =
int,
class Value =
double,
template<
class>
class Vector = thrust::host_vector>
102 template<
class OtherMatrix>
103 using enable_if_serial = std::enable_if_t<std::is_same_v<typename OtherMatrix::policy, SerialTag>, OtherMatrix>;
133 template<
class I,
class V,
template<
class>
class Vec>
142 template<
class I,
class V,
template<
class>
class Vec>
144 : m_num_rows( src.m_num_rows), m_num_cols( src.m_num_cols), m_row_offsets(
145 src.m_row_offsets), m_cols( src.m_cols), m_vals( src.m_vals)
173 if( row_indices.size() !=
values.size())
175 <<row_indices.size()<<
" not equal to values size "<<
values.size()<<
"\n");
184 m_vals.resize(
values.size());
188 Vector<Index> trows( row_indices);
190 Vector<Index> p( row_indices.size());
191 thrust::sequence( p.begin(), p.end());
193 thrust::sort_by_key( tcols.begin(), tcols.end(), p.begin());
195 thrust::gather( p.begin(), p.end(), row_indices.begin(), trows.begin());
197 thrust::stable_sort_by_key( trows.begin(), trows.end(), p.begin());
201 thrust::gather( p.begin(), p.end(),
column_indices.begin(), m_cols.begin());
202 thrust::gather( p.begin(), p.end(),
values.begin(), m_vals.begin());
208 thrust::copy(
values.begin(),
values.end(), m_vals.begin());
254 if( m_num_rows != m_row_offsets.size()-1)
256 <<m_row_offsets.size()<<
" but num_rows is "<<m_num_rows<<
"\n");
257 if( m_cols.size() != m_vals.size())
259 <<m_cols.size()<<
" not equal to values size "<<m_vals.size()<<
"\n");
263 Vector<Index> cols = m_cols;
264 Vector<Value> vals = m_vals;
265 setFromCoo( m_num_rows, m_num_cols, rows, cols, vals,
true);
280 size_t num_nnz()
const {
return m_vals.size();}
287 const Vector<Index> &
row_offsets()
const {
return m_row_offsets;}
295 const Vector<Value> &
values()
const {
return m_vals;}
303 Vector<Value> &
values() {
return m_vals;}
306 template<
class value_type>
309 const auto* row_ptr = thrust::raw_pointer_cast( &m_row_offsets[0]);
310 const auto* col_ptr = thrust::raw_pointer_cast( &m_cols[0]);
311 const auto* val_ptr = thrust::raw_pointer_cast( &m_vals[0]);
313 m_vals.size(), row_ptr, col_ptr, val_ptr, alpha, beta, x,
y);
315#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
316 template<
class value_type>
319 const auto* row_ptr = thrust::raw_pointer_cast( &m_row_offsets[0]);
320 const auto* col_ptr = thrust::raw_pointer_cast( &m_cols[0]);
321 const auto* val_ptr = thrust::raw_pointer_cast( &m_vals[0]);
323 m_vals.size(), row_ptr, col_ptr, val_ptr, alpha, beta, x,
y);
325#elif THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP
326 template<
class value_type>
329 const auto* row_ptr = thrust::raw_pointer_cast( &m_row_offsets[0]);
330 const auto* col_ptr = thrust::raw_pointer_cast( &m_cols[0]);
331 const auto* val_ptr = thrust::raw_pointer_cast( &m_vals[0]);
332 if( !omp_in_parallel())
337 m_vals.size(), row_ptr, col_ptr, val_ptr, alpha, beta, x,
y);
342 m_vals.size(), row_ptr, col_ptr, val_ptr, alpha, beta, x,
y);
360 o.m_num_rows = m_num_cols;
361 o.m_num_cols = m_num_rows;
363 Vector<Index> rows( m_cols.begin(), m_cols.end());
364 Vector<Index> p( rows.size());
365 thrust::sequence( p.begin(), p.end());
367 thrust::stable_sort_by_key( rows.begin(), rows.end(), p.begin());
368 o.m_row_offsets.resize( o.m_num_rows+1);
371 o.m_cols.resize( cols.size());
372 o.m_vals.resize( m_vals.size());
374 thrust::gather( p.begin(), p.end(), cols.begin(), o.m_cols.begin());
375 thrust::gather( p.begin(), p.end(), m_vals.begin(), o.m_vals.begin());
389 template<
class OtherMatrix = SparseMatrix>
391 if( rhs.m_num_rows != m_num_rows)
393 if( rhs.m_num_cols != m_num_cols)
395 for(
size_t i = 0; i < m_row_offsets.size(); i++)
396 if( m_row_offsets[i] != rhs.m_row_offsets[i])
398 for(
size_t i = 0; i < m_cols.size(); i++)
399 if( m_cols[i] != rhs.m_cols[i])
401 for(
size_t i = 0; i < m_vals.size(); i++)
402 if( m_vals[i] != rhs.m_vals[i])
412 template<
class OtherMatrix = SparseMatrix>
413 std::enable_if_t<dg::has_policy_v<OtherMatrix, SerialTag>,
bool>
operator==(
const SparseMatrix& rhs)
const {
return !((*
this != rhs));}
422 return (*
this)*(Value(-1));
431 template<
class OtherMatrix = SparseMatrix>
444 template<
class OtherMatrix = SparseMatrix>
459 dg::blas2::detail::doParallelFor_dispatch(
policy(), m_vals.size(),
460 [value]
DG_DEVICE(
unsigned i, Value* val_ptr)
464 thrust::raw_pointer_cast(m_vals.data()));
475 template<
class OtherMatrix = SparseMatrix>
478 if( lhs.m_num_cols != rhs.m_num_cols)
480 <<lhs.m_num_cols<<
" columns to matrix with "<<rhs.m_num_cols<<
" columns\n");
481 if( lhs.m_num_rows != rhs.m_num_rows)
483 <<lhs.m_num_rows<<
" rows to matrix with "<<rhs.m_num_rows<<
" rows\n");
488 lhs.m_row_offsets, lhs.m_cols, lhs.m_vals,
489 rhs.m_row_offsets, rhs.m_cols, rhs.m_vals,
503 template<
class OtherMatrix = SparseMatrix>
519 template<
class OtherMatrix = SparseMatrix>
536 template<
class OtherMatrix = SparseMatrix>
550 template<
class OtherMatrix = SparseMatrix>
553 if( lhs.m_num_cols != rhs.m_num_rows)
555 <<lhs.m_num_cols<<
" columns with matrix with "<<rhs.m_num_rows<<
" rows\n");
561 lhs.m_row_offsets, lhs.m_cols, lhs.m_vals,
562 rhs.m_row_offsets, rhs.m_cols, rhs.m_vals,
579 template<
class ContainerType,
class = std::enable_if_t < dg::has_policy_v<ContainerType, policy> and dg::is_vector_v<ContainerType> >>
582 if( S.m_num_cols !=
x.size())
584 <<S.m_num_cols<<
" columns with vector with "<<
x.size()<<
" rows\n");
586 ContainerType out(S.m_num_rows);
587 const Value* RESTRICT x_ptr = thrust::raw_pointer_cast(
x.data());
588 Value* RESTRICT y_ptr = thrust::raw_pointer_cast( out.data());
600 template<
class Ostream,
class OtherMatrix = SparseMatrix>
601 friend std::enable_if_t<dg::has_policy_v<OtherMatrix, SerialTag>, Ostream>
604 os <<
"Sparse Matrix with "<<mat.m_num_rows<<
" rows and "<<mat.m_num_cols<<
" columns\n";
605 os <<
" # non-zeroes "<<mat.m_vals.size()<<
"\n";
606 for (
int i = 0; i < (int)mat.m_num_rows; i++)
608 for (
int pB = mat.m_row_offsets[i]; pB < mat.m_row_offsets[i+1]; pB++)
610 os <<
"("<<i<<
","<<mat.m_cols[pB]<<
","<<mat.m_vals[pB]<<
") ";
621#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
623#elif THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP
630 size_t m_num_rows, m_num_cols;
631 Vector<Index> m_row_offsets, m_cols;
632 Vector<Value> m_vals;
637template <
class I,
class T,
template<
class>
class V>
class intended for the use in throw statements
Definition exceptions.h:83
small class holding a stringstream
Definition exceptions.h:29
Error classes or the dg library.
#define _ping_
Definition exceptions.h:12
void symv(MatrixType &&M, const ContainerType1 &x, ContainerType2 &y)
Definition blas2.h:325
typename TensorTraits< std::decay_t< Vector > >::execution_policy get_execution_policy
Definition tensor_traits.h:49
constexpr bool has_policy_v
Utility typedef.
Definition predicate.h:86
typename TensorTraits< std::decay_t< Vector > >::value_type get_value_type
Definition tensor_traits.h:45
#define DG_DEVICE
Expands to __host__ __device__ if compiled with nvcc else is empty.
Definition dg_doc.h:378
void spadd_cpu_kernel(size_t B_num_rows, size_t B_num_cols, const I &B_pos, const I &B_idx, const V &B_val, const I &C_pos, const I &C_idx, const V &C_val, I &A_pos, I &A_idx, V &A_val)
Definition sparsematrix_cpu.h:101
void coo2csr_inline(const I0 &coo, I1 &csr)
Definition sparsematrix.h:52
void spmv_gpu_kernel(CSRCache_gpu &cache, size_t A_num_rows, size_t A_num_cols, size_t A_nnz, const I *A_pos, const I *A_idx, const V *A_val, value_type alpha, value_type beta, const C1 *x_ptr, C2 *y_ptr)
Definition sparsematrix_gpu.cuh:190
void csr2coo_inline(const I0 &csr, I1 &coo)
Definition sparsematrix.h:28
I csr2coo(const I &csr)
Definition sparsematrix.h:44
void spmv_cpu_kernel(CSRCache_cpu &cache, size_t A_num_rows, size_t A_num_cols, size_t A_nnz, const I *RESTRICT A_pos, const I *RESTRICT A_idx, const V *RESTRICT A_val, value_type alpha, value_type beta, const C1 *RESTRICT x_ptr, C2 *RESTRICT y_ptr)
Definition sparsematrix_cpu.h:184
void spmv_omp_kernel(CSRCache_omp &cache, size_t A_num_rows, size_t A_num_cols, size_t A_nnz, const I *RESTRICT A_pos, const I *RESTRICT A_idx, const V *RESTRICT A_val, value_type alpha, value_type beta, const C1 *RESTRICT x_ptr, C2 *RESTRICT y_ptr)
Definition sparsematrix_omp.h:17
I coo2csr(unsigned num_rows, const I &coo)
Definition sparsematrix.h:65
void spgemm_cpu_kernel(size_t B_num_rows, size_t B_num_cols, size_t C_num_cols, const I &B_pos, const I &B_idx, const V &B_val, const I &C_pos, const I &C_idx, const V &C_val, I &A_pos, I &A_idx, V &A_val)
Definition sparsematrix_cpu.h:20
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...
Indicate sequential execution.
Definition execution_policy.h:26
Indicate a contiguous chunk of shared memory.
Definition vector_categories.h:41
A CSR formatted sparse matrix.
Definition sparsematrix.h:96
size_t num_cols() const
Number of columns in matrix.
Definition sparsematrix.h:276
SparseMatrix operator-() const
Negate.
Definition sparsematrix.h:420
void set(size_t num_rows, size_t num_cols, const Vector< Index > &row_offsets, const Vector< Index > column_indices, const Vector< Value > &values, bool sort=false)
Set csr values directly.
Definition sparsematrix.h:231
friend enable_if_serial< OtherMatrix > operator+(const SparseMatrix &lhs, const SparseMatrix &rhs)
add
Definition sparsematrix.h:476
Vector< Value > & values()
Change values vector.
Definition sparsematrix.h:303
friend enable_if_serial< OtherMatrix > operator*(const SparseMatrix &lhs, const SparseMatrix &rhs)
matrix-matrix multiplication
Definition sparsematrix.h:551
SparseMatrix(const SparseMatrix< I, V, Vec > &src)
Copy construct from differen type.
Definition sparsematrix.h:143
SparseMatrix transpose() const
Transposition.
Definition sparsematrix.h:353
Index index_type
The index type (on GPU must be either int or long)
Definition sparsematrix.h:98
dg::get_execution_policy< Vector< Value > > policy
Execution policy of the matrix.
Definition sparsematrix.h:97
size_t total_num_rows() const
Alias for num_rows.
Definition sparsematrix.h:269
const Vector< Index > & row_offsets() const
Read row_offsets vector.
Definition sparsematrix.h:287
void setFromCoo(size_t num_rows, size_t num_cols, const Vector< Index > &row_indices, const Vector< Index > &column_indices, const Vector< Value > &values, bool sort=false)
Set csr values from coo formatted sparse matrix.
Definition sparsematrix.h:171
size_t num_entries() const
Alias for num_nnz.
Definition sparsematrix.h:282
size_t total_num_cols() const
Alias for num_cols.
Definition sparsematrix.h:271
friend enable_if_serial< OtherMatrix > operator*(const SparseMatrix &lhs, const Value &value)
scalar multiplication
Definition sparsematrix.h:537
SparseMatrix(size_t num_rows, size_t num_cols, const Vector< Index > &row_offsets, const Vector< Index > &column_indices, const Vector< Value > &values)
Directly construct from given vectors.
Definition sparsematrix.h:125
friend enable_if_serial< OtherMatrix > operator-(const SparseMatrix &lhs, const SparseMatrix &rhs)
subtract
Definition sparsematrix.h:504
size_t num_nnz() const
Number of nonzero elements in matrix.
Definition sparsematrix.h:280
enable_if_serial< OtherMatrix > & operator-=(const SparseMatrix &op)
subtract
Definition sparsematrix.h:445
std::enable_if_t< dg::has_policy_v< OtherMatrix, SerialTag >, bool > operator==(const SparseMatrix &rhs) const
Two Matrices are considered equal if elements and sparsity pattern are equal.
Definition sparsematrix.h:413
SparseMatrix & operator*=(const Value &value)
scalar multiply
Definition sparsematrix.h:457
SparseMatrix()=default
Empty matrix.
Value value_type
The value type (on GPU must be either float or double)
Definition sparsematrix.h:99
friend std::enable_if_t< dg::has_policy_v< OtherMatrix, SerialTag >, Ostream > & operator<<(Ostream &os, const SparseMatrix &mat)
puts a matrix linewise in output stream
Definition sparsematrix.h:602
void sort_indices()
Sort by row and column.
Definition sparsematrix.h:252
size_t num_vals() const
Alias for num_nnz.
Definition sparsematrix.h:278
const Vector< Index > & column_indices() const
Read column indices vector.
Definition sparsematrix.h:291
std::enable_if_t< std::is_same_v< typename OtherMatrix::policy, SerialTag >, OtherMatrix > enable_if_serial
A static guard to allow certain members only on host (for serial execution)
Definition sparsematrix.h:103
friend enable_if_serial< OtherMatrix > operator*(const Value &value, const SparseMatrix &rhs)
scalar multiplication
Definition sparsematrix.h:520
friend ContainerType operator*(const SparseMatrix &S, const ContainerType &x)
matrix-vector multiplication
Definition sparsematrix.h:580
const Vector< Value > & values() const
Read values vector.
Definition sparsematrix.h:295
std::enable_if_t< dg::has_policy_v< OtherMatrix, SerialTag >, bool > operator!=(const SparseMatrix &rhs) const
Two Matrices are considered equal if elements and sparsity pattern are equal.
Definition sparsematrix.h:390
enable_if_serial< OtherMatrix > & operator+=(const SparseMatrix &op)
Add.
Definition sparsematrix.h:432
size_t num_rows() const
Number of rows in matrix.
Definition sparsematrix.h:274
indicate our sparse matrix format
Definition matrix_categories.h:35
typename SparseMatrix< I, T, V >::policy execution_policy
Definition sparsematrix.h:642
T value_type
Definition sparsematrix.h:640
The vector traits.
Definition tensor_traits.h:38
Definition sparsematrix_cpu.h:179
Definition sparsematrix_gpu.cuh:89
Definition sparsematrix_omp.h:12