Discontinuous Galerkin Library
#include "dg/algorithm.h"
Loading...
Searching...
No Matches
sparsematrix_gpu.cuh
Go to the documentation of this file.
1#pragma once
2
3#include <cusparse.h>
4#include <exception>
5#include <complex.h>
6#include <thrust/complex.h>
7
8namespace dg
9{
10namespace detail
11{
12template<class value_type>
13inline cudaDataType_t getCudaDataType(){ assert( false && "CUDA Type not supported!\n" ); return CUDA_R_64F; }
14
15template<> inline cudaDataType_t getCudaDataType<int>(){ return CUDA_R_32I;}
16template<> inline cudaDataType_t getCudaDataType<float>() { return CUDA_R_32F;}
17template<> inline cudaDataType_t getCudaDataType<double>(){ return CUDA_R_64F;}
18template<> inline cudaDataType_t getCudaDataType<std::complex<float>>(){ return CUDA_C_32F;}
19template<> inline cudaDataType_t getCudaDataType<std::complex<double>>(){ return CUDA_C_64F;}
20template<> inline cudaDataType_t getCudaDataType<thrust::complex<float>>(){ return CUDA_C_32F;}
21template<> inline cudaDataType_t getCudaDataType<thrust::complex<double>>(){ return CUDA_C_64F;}
22
23template<class value_type>
24inline cusparseIndexType_t getCudaIndexType(){ assert( false && "CUDA Type not supported!\n" ); return CUSPARSE_INDEX_32I; }
25template<> inline cusparseIndexType_t getCudaIndexType<int>(){ return CUSPARSE_INDEX_32I;}
26template<> inline cusparseIndexType_t getCudaIndexType<signed long int>(){ return CUSPARSE_INDEX_64I;}
27
28
29struct CusparseError : public std::exception
30{
31 CusparseError( cusparseStatus_t error): m_error( error) {}
32
33 cusparseStatus_t error() const { return m_error;}
34 cusparseStatus_t& error() { return m_error;}
35 char const* what() const noexcept{
36 return cusparseGetErrorString(m_error);}
37 private:
38 cusparseStatus_t m_error;
39};
40
42{
43 CusparseErrorHandle operator=( cusparseStatus_t err)
44 {
46 return h(err);
47 }
48 CusparseErrorHandle operator()( cusparseStatus_t err)
49 {
50 if( err != CUSPARSE_STATUS_SUCCESS)
51 throw CusparseError( err);
52 return *this;
53 }
54};
55
56
57// Singleton
58// https://stackoverflow.com/questions/1008019/how-do-you-implement-the-singleton-design-pattern
60{
62 {
63 // Exists only once even in different translation units
64 // https://stackoverflow.com/questions/50609921/singleton-translation-unit-confusion
65 static CusparseHandle instance;
66 return instance;
67 }
68 private:
69 cusparseHandle_t m_handle;
71 {
72 cusparseCreate( &m_handle);
73 }
74 ~CusparseHandle()
75 {
76 cusparseDestroy(m_handle);
77 }
78 public:
79 cusparseHandle_t handle() const { return m_handle;}
80 CusparseHandle( const CusparseHandle&) = delete;
81 void operator=( const CusparseHandle&) = delete;
82};
83
84
85inline bool cusparse_is_initialized = false;
86
87// https://docs.nvidia.com/cuda/cusparse/#cusparsespmv
89{
90 CSRCache_gpu() = default;
91 template<class I, class V>
93 size_t num_rows, size_t num_cols, size_t nnz,
94 const I* pos , const I* idx, const V* val)
95 {
96 update( num_rows, num_cols, nnz, pos, idx, val);
97 }
99 {
100 // Copying makes the cache inactive
101 }
103 {
104 src.swap(*this);
105 }
107 if( &src != this)
108 {
109 CSRCache_gpu tmp(src);
110 tmp.swap( *this);
111 }
112 return *this;
113 }
115 CSRCache_gpu tmp( std::move(src));
116 tmp.swap(*this);
117 return *this;
118 }
120 {
121 if ( m_dBuffer != nullptr)
122 cudaFree( m_dBuffer);
123 if( m_active)
124 cusparseDestroySpMat( m_matA);
125 }
126 void swap( CSRCache_gpu& src)
127 {
128 std::swap( m_active, src.m_active);
129 std::swap( m_matA, src.m_matA);
130 std::swap( m_dBuffer, src.m_dBuffer);
131 std::swap( m_bufferSize, src.m_bufferSize);
132 }
133 void forget() { m_active = false;}
134 bool isUpToDate() const { return m_active;}
135 template<class I, class V>
136 void update(
137 size_t num_rows, size_t num_cols, size_t nnz,
138 const I* pos , const I* idx, const V* val)
139 {
140 // cusparse practically only supports uniform data-types (e.g. double Matrix, complex vector is not supported)
142 cusparseDnVecDescr_t vecX;
143 cusparseDnVecDescr_t vecY;
144 err = cusparseCreateCsr( &m_matA, num_rows, num_cols, nnz,
145 const_cast<I*>(pos), const_cast<I*>(idx), const_cast<V*>(val),
147 CUSPARSE_INDEX_BASE_ZERO, getCudaDataType<V>() );
148 V * x_ptr;
149 cudaMalloc( &x_ptr, num_cols*sizeof(V));
150 err = cusparseCreateDnVec( &vecX, num_cols, x_ptr, getCudaDataType<V>());
151 V * y_ptr;
152 cudaMalloc( &y_ptr, num_cols*sizeof(V));
153 err = cusparseCreateDnVec( &vecY, num_rows, y_ptr, getCudaDataType<V>());
154 size_t bufferSize = 0;
155 V alpha = 1, beta = 0;
156 //ALG1 is not reproducible, ALG2 is bitwise reproducible
157 err = cusparseSpMV_bufferSize( CusparseHandle::getInstance().handle(),
158 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, m_matA, vecX, &beta, vecY,
159 getCudaDataType<V>(), CUSPARSE_SPMV_CSR_ALG1, &bufferSize);
160 // Re-allocate buffer
161 if ( m_dBuffer != nullptr)
162 cudaFree( m_dBuffer);
163 cudaMalloc( &m_dBuffer, bufferSize);
164
165 m_active = true;
166#if (CUDART_VERSION >= 12040) // _preprocess only exists as of 12.4
167 err = cusparseSpMV_preprocess( CusparseHandle::getInstance().handle(),
168 CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, m_matA, vecX, &beta, vecY,
169 getCudaDataType<V>(), CUSPARSE_SPMV_CSR_ALG1, m_dBuffer);
170 // m_buffer is now associated to m_matA
171#endif
172 err = cusparseDestroyDnVec( vecX);
173 err = cusparseDestroyDnVec( vecY);
174 cudaFree( x_ptr);
175 cudaFree( y_ptr);
176 }
177 size_t getBufferSize() { return m_bufferSize;}
178 void * getBuffer() { return m_dBuffer;}
179 cusparseSpMatDescr_t getSpMat() const { return m_matA;}
180
181 private:
182 bool m_active = false;
183 cusparseSpMatDescr_t m_matA;
184 void * m_dBuffer = nullptr;
185 size_t m_bufferSize = 0;
186};
187
188//y = alpha A*x + beta y
189template<class I, class V, class value_type, class C1, class C2>
191 CSRCache_gpu& cache,
192 size_t A_num_rows, size_t A_num_cols, size_t A_nnz,
193 const I* A_pos , const I* A_idx, const V* A_val,
194 value_type alpha, value_type beta, const C1* x_ptr, C2* y_ptr
195)
196{
198 // Assume here that the descriptors are lightweight structures ...
199 cusparseDnVecDescr_t vecX;
200 cusparseDnVecDescr_t vecY;
201 err = cusparseCreateDnVec( &vecX, A_num_cols, const_cast<C1*>(x_ptr), getCudaDataType<C1>());
202 err = cusparseCreateDnVec( &vecY, A_num_rows, y_ptr, getCudaDataType<C2>());
203
204 if( not cache.isUpToDate())
205 cache.update<I,V>( A_num_rows, A_num_cols, A_nnz, A_pos, A_idx, A_val);
206
207 err = cusparseSpMV( CusparseHandle::getInstance().handle(), CUSPARSE_OPERATION_NON_TRANSPOSE,
208 &alpha, cache.getSpMat(), vecX, &beta, vecY, getCudaDataType<V>(),
209 CUSPARSE_SPMV_CSR_ALG1, cache.getBuffer());
210
211 err = cusparseDestroyDnVec( vecX);
212 err = cusparseDestroyDnVec( vecY);
213}
214
215} // namespace detail
216} // namespace dg
void spmv_gpu_kernel(CSRCache_gpu &cache, size_t A_num_rows, size_t A_num_cols, size_t A_nnz, const I *A_pos, const I *A_idx, const V *A_val, value_type alpha, value_type beta, const C1 *x_ptr, C2 *y_ptr)
Definition sparsematrix_gpu.cuh:190
cudaDataType_t getCudaDataType< double >()
Definition sparsematrix_gpu.cuh:17
cusparseIndexType_t getCudaIndexType< int >()
Definition sparsematrix_gpu.cuh:25
cudaDataType_t getCudaDataType< float >()
Definition sparsematrix_gpu.cuh:16
cusparseIndexType_t getCudaIndexType< signed long int >()
Definition sparsematrix_gpu.cuh:26
cudaDataType_t getCudaDataType()
Definition sparsematrix_gpu.cuh:13
cudaDataType_t getCudaDataType< int >()
Definition sparsematrix_gpu.cuh:15
cusparseIndexType_t getCudaIndexType()
Definition sparsematrix_gpu.cuh:24
bool cusparse_is_initialized
Definition sparsematrix_gpu.cuh:85
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...
Definition sparsematrix_gpu.cuh:89
CSRCache_gpu & operator=(CSRCache_gpu &&src)
Definition sparsematrix_gpu.cuh:114
CSRCache_gpu & operator=(const CSRCache_gpu &src)
Definition sparsematrix_gpu.cuh:106
CSRCache_gpu(CSRCache_gpu &&src)
Definition sparsematrix_gpu.cuh:102
bool isUpToDate() const
Definition sparsematrix_gpu.cuh:134
cusparseSpMatDescr_t getSpMat() const
Definition sparsematrix_gpu.cuh:179
void update(size_t num_rows, size_t num_cols, size_t nnz, const I *pos, const I *idx, const V *val)
Definition sparsematrix_gpu.cuh:136
size_t getBufferSize()
Definition sparsematrix_gpu.cuh:177
CSRCache_gpu(const CSRCache_gpu &src)
Definition sparsematrix_gpu.cuh:98
void swap(CSRCache_gpu &src)
Definition sparsematrix_gpu.cuh:126
void * getBuffer()
Definition sparsematrix_gpu.cuh:178
void forget()
Definition sparsematrix_gpu.cuh:133
CSRCache_gpu(size_t num_rows, size_t num_cols, size_t nnz, const I *pos, const I *idx, const V *val)
Definition sparsematrix_gpu.cuh:92
~CSRCache_gpu()
Definition sparsematrix_gpu.cuh:119
Definition sparsematrix_gpu.cuh:42
CusparseErrorHandle operator()(cusparseStatus_t err)
Definition sparsematrix_gpu.cuh:48
CusparseErrorHandle operator=(cusparseStatus_t err)
Definition sparsematrix_gpu.cuh:43
Definition sparsematrix_gpu.cuh:30
CusparseError(cusparseStatus_t error)
Definition sparsematrix_gpu.cuh:31
cusparseStatus_t & error()
Definition sparsematrix_gpu.cuh:34
cusparseStatus_t error() const
Definition sparsematrix_gpu.cuh:33
char const * what() const noexcept
Definition sparsematrix_gpu.cuh:35
Definition sparsematrix_gpu.cuh:60
CusparseHandle(const CusparseHandle &)=delete
void operator=(const CusparseHandle &)=delete
static CusparseHandle & getInstance()
Definition sparsematrix_gpu.cuh:61
cusparseHandle_t handle() const
Definition sparsematrix_gpu.cuh:79