19template<
class I,
class V>
21 size_t B_num_rows,
size_t B_num_cols,
size_t C_num_cols,
22 const I& B_pos ,
const I& B_idx,
const V& B_val,
23 const I& C_pos ,
const I& C_idx,
const V& C_val,
24 I& A_pos , I& A_idx, V& A_val
36 A_pos.resize( B_num_rows+1);
38 std::vector<bool> w( C_num_cols,
false);
41 for(
int i = 0; i<(int)B_num_rows; i++)
43 for(
int pB = B_pos[i]; pB < B_pos[i+1]; pB++)
46 for(
int pC = C_pos[k]; pC < C_pos[k+1]; pC++)
57 std::sort( wlist.begin(), wlist.end());
60 A_idx.resize( A_pos[i] + wlist.size());
61 A_pos[i+1] = A_pos[i] + wlist.size();
62 for(
int pwlist = 0; pwlist < (int)wlist.size(); pwlist ++)
64 int j = wlist[pwlist];
65 A_idx[ A_pos[i] + pwlist ] = j;
70 A_val.resize( A_pos[B_num_rows]);
74 V workspace( C_num_cols, 0);
75 for (
int i = 0; i < (int)B_num_rows; i++)
78 for (
int pB = B_pos[i]; pB < B_pos[i+1]; pB++)
81 for (
int pC = C_pos[k]; pC < C_pos[k+1]; pC++)
84 workspace[j] += B_val[pB] * C_val[pC];
88 for (
int pA = A_pos[i]; pA < A_pos[i+1]; pA++)
91 A_val[pA] = workspace[j];
100template<
class I,
class V>
102 size_t B_num_rows,
size_t B_num_cols,
103 const I& B_pos ,
const I& B_idx,
const V& B_val,
104 const I& C_pos ,
const I& C_idx,
const V& C_val,
105 I& A_pos , I& A_idx, V& A_val
113 A_pos.resize( B_num_rows+1);
115 std::vector<bool> w( B_num_cols,
false);
118 for(
int i = 0; i<(int)B_num_rows; i++)
121 for (
int pB = B_pos[i]; pB < B_pos[i+1]; pB++)
127 for (
int pC = C_pos[i]; pC < C_pos[i+1]; pC++)
137 std::sort( wlist.begin(), wlist.end());
139 A_idx.resize( A_pos[i] + wlist.size());
140 A_pos[i+1] = A_pos[i] + wlist.size();
141 for(
int pwlist = 0; pwlist < (int)wlist.size(); pwlist ++)
143 int j = wlist[pwlist];
144 A_idx[ A_pos[i] + pwlist ] = j;
149 A_val.resize( A_pos[B_num_rows]);
154 V workspace( B_num_cols, 0);
155 for (
int i = 0; i < (int)B_num_rows; i++)
158 for (
int pB = B_pos[i]; pB < B_pos[i+1]; pB++)
161 workspace[ib] = B_val[pB];
163 for (
int pC = C_pos[i]; pC < C_pos[i+1]; pC++)
166 workspace[ic] += C_val[pC];
169 for (
int pA = A_pos[i]; pA < A_pos[i+1]; pA++)
172 A_val[pA] = workspace[ia];
183template<
class I,
class V,
class value_type,
class C1,
class C2>
186 size_t A_num_rows,
size_t A_num_cols,
size_t A_nnz,
187 const I* RESTRICT A_pos ,
const I* RESTRICT A_idx,
const V* RESTRICT A_val,
188 value_type alpha, value_type beta,
const C1* RESTRICT x_ptr, C2* RESTRICT y_ptr
191 if( beta == value_type(1))
194 for(
int i = 0; i < (int)A_num_rows; i++)
196 for (
int jj = A_pos[i]; jj < A_pos[i+1]; jj++)
199 y_ptr[i] = DG_FMA( alpha*A_val[jj], x_ptr[j], y_ptr[i]);
206 for(
int i = 0; i < (int)A_num_rows; i++)
209 for (
int jj = A_pos[i]; jj < A_pos[i+1]; jj++)
212 temp = DG_FMA( alpha*A_val[jj], x_ptr[j], temp);
215 y_ptr[i] = DG_FMA( beta, y_ptr[i], temp);
void spadd_cpu_kernel(size_t B_num_rows, size_t B_num_cols, const I &B_pos, const I &B_idx, const V &B_val, const I &C_pos, const I &C_idx, const V &C_val, I &A_pos, I &A_idx, V &A_val)
Definition sparsematrix_cpu.h:101
void spmv_cpu_kernel(CSRCache_cpu &cache, size_t A_num_rows, size_t A_num_cols, size_t A_nnz, const I *RESTRICT A_pos, const I *RESTRICT A_idx, const V *RESTRICT A_val, value_type alpha, value_type beta, const C1 *RESTRICT x_ptr, C2 *RESTRICT y_ptr)
Definition sparsematrix_cpu.h:184
void spgemm_cpu_kernel(size_t B_num_rows, size_t B_num_cols, size_t C_num_cols, const I &B_pos, const I &B_idx, const V &B_val, const I &C_pos, const I &C_idx, const V &C_val, I &A_pos, I &A_idx, V &A_val)
Definition sparsematrix_cpu.h:20
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...
Definition sparsematrix_cpu.h:179
void forget()
Definition sparsematrix_cpu.h:180