dg/html/sparseblockmat_8h_source.html

#pragma once


#include <cmath>

#include <numeric>

#include <thrust/host_vector.h>

#include "exblas/exdot_serial.h"

#include "config.h"

#include "exceptions.h"

#include "tensor_traits.h"

#include "sparsematrix.h"


namespace dg

{


template<class real_type, template <class> class Vector>


struct EllSparseBlockMat

{

    static constexpr int invalid_index = -1;

    EllSparseBlockMat() = default;


    EllSparseBlockMat( int num_block_rows, int num_block_cols,

                  int num_blocks_per_line, int num_different_blocks, int n):

        data(num_different_blocks*n*n),

        cols_idx( num_block_rows*num_blocks_per_line),

        data_idx(cols_idx.size()), right_range(2),

        num_rows(num_block_rows),

        num_cols(num_block_cols),

        blocks_per_line(num_blocks_per_line),

        n(n), left_size(1), right_size(1)

        {

            right_range[0]=0;

            right_range[1]=1;

        }


    template< class other_real_type, template<class> class Other_Vector>

    friend struct EllSparseBlockMat; // enable copy


    template< class other_real_type, template<class> class Other_Vector>


    EllSparseBlockMat( const EllSparseBlockMat<other_real_type, Other_Vector>& src)

    {

        data = src.data;

        cols_idx = src.cols_idx, data_idx = src.data_idx;

        num_rows = src.num_rows, num_cols = src.num_cols, blocks_per_line = src.blocks_per_line;

        n = src.n, left_size = src.left_size, right_size = src.right_size;

        right_range = src.right_range;

    }


    int total_num_rows()const{

        return num_rows*n*left_size*right_size;

    }


    int total_num_cols()const{

        return num_cols*n*left_size*right_size;

    }


    dg::SparseMatrix<int, real_type, thrust::host_vector> asCuspMatrix() const;


    template<class value_type>


    void symv(SharedVectorTag, SerialTag, value_type alpha, const value_type* RESTRICT x, value_type beta, value_type* RESTRICT y) const

    {

        launch_multiply_kernel( SerialTag(), alpha, x, beta, y);

    }


    template<class value_type>


    void symv(SharedVectorTag, CudaTag, value_type alpha, const value_type* x, value_type beta, value_type* y) const

    {

        launch_multiply_kernel( CudaTag(), alpha, x, beta, y);

    }


#ifdef _OPENMP

    template<class value_type>

    void symv(SharedVectorTag, OmpTag, value_type alpha, const value_type* x, value_type beta, value_type* y) const

    {

        if( !omp_in_parallel())

        {

            #pragma omp parallel

            {

                launch_multiply_kernel( OmpTag(), alpha, x, beta, y);

            }

            return;

        }

        launch_multiply_kernel( OmpTag(), alpha, x, beta, y);

    }

#endif //_OPENMP


    void set_default_range(){

        right_range[0]=0;

        right_range[1]=right_size;

    }


    void set_right_size( int new_right_size ){

        right_size = new_right_size;

        set_default_range();

    }


    void set_left_size( int new_left_size ){

        left_size = new_left_size;

    }


    void display( std::ostream& os = std::cout, bool show_data = false) const;


    Vector<real_type> data;

    Vector<int> cols_idx;

    Vector<int> data_idx;

    Vector<int> right_range;

    int num_rows;

    int num_cols;

    int blocks_per_line;

    int n;

    int left_size;

    int right_size;

    private:

    template<class value_type>

    void launch_multiply_kernel(SerialTag, value_type alpha, const value_type* RESTRICT x, value_type beta, value_type* RESTRICT y) const;

    template<class value_type>

    void launch_multiply_kernel(CudaTag, value_type alpha, const value_type* x, value_type beta, value_type* y) const;

#ifdef _OPENMP

    template<class value_type>

    void launch_multiply_kernel(OmpTag, value_type alpha, const value_type* x, value_type beta, value_type* y) const;

#endif //_OPENMP


};


// TODO not sure this should be public...


//four classes/files play together in mpi distributed EllSparseBlockMat

//CooSparseBlockMat and kernels, NearestNeighborComm, RowColDistMat

//and the creation functions in mpi_derivatives.h

template<class real_type, template <class > class Vector>


struct CooSparseBlockMat

{

    CooSparseBlockMat() = default;


    CooSparseBlockMat( int num_block_rows, int num_block_cols, int n, int left_size, int right_size):

        num_rows(num_block_rows), num_cols(num_block_cols), num_entries(0),

        n(n),left_size(left_size), right_size(right_size){}


    template< class other_real_type, template<class> class Other_Vector>

    friend struct CooSparseBlockMat; // enable copy


    template< class other_real_type, template<class> class Other_Vector>


    CooSparseBlockMat( const CooSparseBlockMat<other_real_type, Other_Vector>& src)

    {

        data = src.data;

        rows_idx = src.rows_idx, cols_idx = src.cols_idx, data_idx = src.data_idx;

        num_rows = src.num_rows, num_cols = src.num_cols, num_entries = src.num_entries;

        n = src.n, left_size = src.left_size, right_size = src.right_size;

    }


    void add_value( int row, int col, const Vector<real_type>& element)

    {

        assert( (int)element.size() == n*n);

        int index = data.size()/n/n;

        data.insert( data.end(), element.begin(), element.end());

        add_value( row, col, index);

    }


    void add_value( int row, int col, int data)

    {

        rows_idx.push_back(row);

        cols_idx.push_back(col);

        data_idx.push_back( data );

        num_entries++;

    }


    int total_num_rows()const{

        return num_rows*n*left_size*right_size;

    }


    int total_num_cols()const{

        return num_cols*n*left_size*right_size;

    }


    template<class value_type>


    void symv(SharedVectorTag, SerialTag, value_type alpha, const value_type** x, value_type beta, value_type* RESTRICT y) const

    {

        launch_multiply_kernel( SerialTag(), alpha, x, beta, y);

    }


    template<class value_type>


    void symv(SharedVectorTag, CudaTag, value_type alpha, const value_type** x, value_type beta, value_type* y) const

    {

        launch_multiply_kernel( CudaTag(), alpha, x, beta, y);

    }


#ifdef _OPENMP

    template<class value_type>

    void symv(SharedVectorTag, OmpTag, value_type alpha, const value_type** x, value_type beta, value_type* y) const

    {

        if( !omp_in_parallel())

        {

            #pragma omp parallel

            {

                launch_multiply_kernel( OmpTag(), alpha, x, beta, y);

            }

            return;

        }

        launch_multiply_kernel( OmpTag(), alpha, x, beta, y);

    }

#endif //_OPENMP


    void display(std::ostream& os = std::cout, bool show_data = false) const;


    Vector<real_type> data;

    Vector<int> cols_idx;

    Vector<int> rows_idx;

    Vector<int> data_idx;

    int num_rows;

    int num_cols;

    int num_entries;

    int n;

    int left_size;

    int right_size;

    private:

    template<class value_type>

    void launch_multiply_kernel(SerialTag, value_type alpha, const value_type** x, value_type beta, value_type* RESTRICT y) const;

    template<class value_type>

    void launch_multiply_kernel(CudaTag, value_type alpha, const value_type** x, value_type beta, value_type* y) const;

#ifdef _OPENMP

    template<class value_type>

    void launch_multiply_kernel(OmpTag, value_type alpha, const value_type** x, value_type beta, value_type* y) const;

#endif //_OPENMP

};


//template<class real_type, template<class> class Vector>

//template<class value_type>

//void EllSparseBlockMat<real_type, Vector>::symv(SharedVectorTag, SerialTag, value_type alpha, const value_type* RESTRICT x, value_type beta, value_type* RESTRICT y) const

//{

//    //simplest implementation (all optimization must respect the order of operations)

//    for( int s=0; s<left_size; s++)

//    for( int i=0; i<num_rows; i++)

//    for( int k=0; k<n; k++)

//    for( int j=right_range[0]; j<right_range[1]; j++)

//    {

//        int I = ((s*num_rows + i)*n+k)*right_size+j;

//        // if y[I] isnan then even beta = 0 does not make it 0

//        y[I] = beta == 0 ? (value_type)0 : y[I]*beta;

//        for( int d=0; d<blocks_per_line; d++)

//        {

//            value_type temp = 0;

//            int J = cols_idx[i*blocks_per_line+d];

//            if ( J == invalid_index)

//                continue;

//

//            for( int q=0; q<n; q++) //multiplication-loop

//                temp = DG_FMA( data[ (data_idx[i*blocks_per_line+d]*n + k)*n+q],

//                            x[((s*num_cols + J)*n+q)*right_size+j],

//                            temp);

//            y[I] = DG_FMA( alpha,temp, y[I]);

//        }

//    }

//}

template<class real_type, template<class> class Vector>

dg::SparseMatrix<int, real_type, thrust::host_vector> EllSparseBlockMat<real_type, Vector>::asCuspMatrix() const

{

    thrust::host_vector<real_type > values;

    thrust::host_vector<int> row_indices;

    thrust::host_vector<int> column_indices;

    for( int s=0; s<left_size; s++)

    for( int i=0; i<num_rows; i++)

    for( int k=0; k<n; k++)

    for( int j=right_range[0]; j<right_range[1]; j++)

    {

        int I = ((s*num_rows + i)*n+k)*right_size+j;

        for( int d=0; d<blocks_per_line; d++)

        for( int q=0; q<n; q++) //multiplication-loop

        {

            int J = cols_idx[i*blocks_per_line+d];

            if ( J == invalid_index)

                continue;

            row_indices.push_back(I);

            column_indices.push_back(

                ((s*num_cols + J)*n+q)*right_size+j);

            values.push_back(data[ (data_idx[i*blocks_per_line+d]*n + k)*n+q]);

        }

    }

    dg::SparseMatrix<int,real_type, thrust::host_vector> A;

    A.setFromCoo( total_num_rows(), total_num_cols(), row_indices, column_indices, values);

    return A;

}


//template<class real_type, template<class> class Vector>

//template<class value_type>

//void CooSparseBlockMat<real_type, Vector>::symv( SharedVectorTag, SerialTag, value_type alpha, const value_type** x, value_type beta, value_type* RESTRICT y) const

//{

//    if( num_entries==0)

//        return;

//    if( beta!= 1 )

//        std::cerr << "Beta != 1 yields wrong results in CooSparseBlockMat!! Beta = "<<beta<<"\n";

//    assert( beta == 1 && "Beta != 1 yields wrong results in CooSparseBlockMat!!");

//    // In fact, Beta is ignored in the following code

//    // beta == 1 avoids the need to access all values in y, just the cols we want

//    // This makes symv a sparse vector = sparse matrix x sparse vector operation

//

//    //simplest implementation (sums block by block)

//    for( int s=0; s<left_size; s++)

//    for( int k=0; k<n; k++)

//    for( int j=0; j<right_size; j++)

//    for( int i=0; i<num_entries; i++)

//    {

//        value_type temp = 0;

//        for( int q=0; q<n; q++) //multiplication-loop

//            temp = DG_FMA( data[ (data_idx[i]*n + k)*n+q],

//                    //x[((s*num_cols + cols_idx[i])*n+q)*right_size+j],

//                    x[cols_idx[i]][(q*left_size +s )*right_size+j],

//                    temp);

//        int I = ((s*num_rows + rows_idx[i])*n+k)*right_size+j;

//        y[I] = DG_FMA( alpha,temp, y[I]);

//    }

//}


template<class T, template<class> class Vector>

void EllSparseBlockMat<T, Vector>::display( std::ostream& os, bool show_data ) const

{

    os << "Data array has   "<<data.size()/n/n<<" blocks of size "<<n<<"x"<<n<<"\n";

    os << "num_rows         "<<num_rows<<"\n";

    os << "num_cols         "<<num_cols<<"\n";

    os << "blocks_per_line  "<<blocks_per_line<<"\n";

    os << "n                "<<n<<"\n";

    os << "left_size             "<<left_size<<"\n";

    os << "right_size            "<<right_size<<"\n";

    os << "right_range_0         "<<right_range[0]<<"\n";

    os << "right_range_1         "<<right_range[1]<<"\n";

    os << "Column indices: \n";

    for( int i=0; i<num_rows; i++)

    {

        for( int d=0; d<blocks_per_line; d++)

            os << cols_idx[i*blocks_per_line + d] <<" ";

        os << "\n";

    }

    os << "\n Data indices: \n";

    for( int i=0; i<num_rows; i++)

    {

        for( int d=0; d<blocks_per_line; d++)

            os << data_idx[i*blocks_per_line + d] <<" ";

        os << "\n";

    }

    if(show_data)

    {

        os << "\n Data: \n";

        for( unsigned i=0; i<data.size()/n/n; i++)

            for(unsigned k=0; k<n*n; k++)

            {

                dg::exblas::udouble res;

                res.d = data[i*n*n+k];

                os << "idx "<<i<<" "<<res.d <<"\t"<<res.i<<"\n";

            }

    }

    os << std::endl;

}


template<class real_type, template<class> class Vector>

void CooSparseBlockMat<real_type, Vector>::display( std::ostream& os, bool show_data) const

{

    os << "Data array has   "<<data.size()/n/n<<" blocks of size "<<n<<"x"<<n<<"\n";

    os << "num_rows         "<<num_rows<<"\n";

    os << "num_cols         "<<num_cols<<"\n";

    os << "num_entries      "<<num_entries<<"\n";

    os << "n                "<<n<<"\n";

    os << "left_size             "<<left_size<<"\n";

    os << "right_size            "<<right_size<<"\n";

    os << "row\tcolumn\tdata:\n";

    for( int i=0; i<num_entries; i++)

        os << rows_idx[i]<<"\t"<<cols_idx[i] <<"\t"<<data_idx[i]<<"\n";

    if(show_data)

    {

        os << "\n Data: \n";

        for( unsigned i=0; i<data.size()/n/n; i++)

            for(unsigned k=0; k<n*n; k++)

            {

                dg::exblas::udouble res;

                res.d = data[i*n*n+k];

                os << "idx "<<i<<" "<<res.d <<"\t"<<res.i<<"\n";

            }

    }

    os << std::endl;


}


template <class T, template<class> class V>


struct TensorTraits<EllSparseBlockMat<T, V> >

{

    using value_type  = T;

    using tensor_category = SparseBlockMatrixTag;

    using execution_policy = dg::get_execution_policy<V<T>>;

};


template <class T, template<class> class V>


struct TensorTraits<CooSparseBlockMat<T, V> >

{

    using value_type  = T;

    using tensor_category = SparseBlockMatrixTag;

    using execution_policy = dg::get_execution_policy<V<T>>;

};


} //namespace dg


#include "sparseblockmat_cpu_kernels.h"

#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA

#include "sparseblockmat_gpu_kernels.cuh"

#elif THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_OMP

#include "sparseblockmat_omp_kernels.h"

#endif

exceptions.h
Error classes or the dg library.

dg::coo2d::y
@ y
y direction

dg::coo2d::x
@ x
x direction

dg::get_execution_policy
typename TensorTraits< std::decay_t< Vector > >::execution_policy get_execution_policy
Definition tensor_traits.h:49

alpha
const double alpha

n
const double n

dg
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...

beta
const double beta

sparseblockmat_cpu_kernels.h

sparsematrix.h

dg::CooSparseBlockMat
Coo Sparse Block Matrix format.
Definition sparseblockmat.h:228

dg::CooSparseBlockMat::display
void display(std::ostream &os=std::cout, bool show_data=false) const
Display internal data to a stream.

dg::CooSparseBlockMat::CooSparseBlockMat
CooSparseBlockMat(int num_block_rows, int num_block_cols, int n, int left_size, int right_size)
Allocate storage.
Definition sparseblockmat.h:240

dg::CooSparseBlockMat::num_entries
int num_entries
number of entries in the matrix
Definition sparseblockmat.h:345

dg::CooSparseBlockMat::symv
void symv(SharedVectorTag, CudaTag, value_type alpha, const value_type **x, value_type beta, value_type *y) const
Definition sparseblockmat.h:311

dg::CooSparseBlockMat::total_num_cols
int total_num_cols() const
total number of columns is num_cols*n*left_size*right_size
Definition sparseblockmat.h:290

dg::CooSparseBlockMat::left_size
int left_size
size of the left Kronecker delta
Definition sparseblockmat.h:347

dg::CooSparseBlockMat::rows_idx
Vector< int > rows_idx
is of size num_entries and contains the row indices
Definition sparseblockmat.h:341

dg::CooSparseBlockMat::data
Vector< real_type > data
The data array is of size n*n*num_different_blocks and contains the blocks.
Definition sparseblockmat.h:339

dg::CooSparseBlockMat::CooSparseBlockMat
CooSparseBlockMat()=default
default constructor does nothing

dg::CooSparseBlockMat::right_size
int right_size
size of the right Kronecker delta (is e.g 1 for a x - derivative)
Definition sparseblockmat.h:348

dg::CooSparseBlockMat::add_value
void add_value(int row, int col, int data)
Convenience function to assemble the matrix.
Definition sparseblockmat.h:277

dg::CooSparseBlockMat::data_idx
Vector< int > data_idx
is of size num_entries and contains indices into the data array
Definition sparseblockmat.h:342

dg::CooSparseBlockMat::num_cols
int num_cols
number of columns (never actually used with pointer approach
Definition sparseblockmat.h:344

dg::CooSparseBlockMat::n
int n
each block has size n*n
Definition sparseblockmat.h:346

dg::CooSparseBlockMat::CooSparseBlockMat
CooSparseBlockMat(const CooSparseBlockMat< other_real_type, Other_Vector > &src)
Definition sparseblockmat.h:248

dg::CooSparseBlockMat::add_value
void add_value(int row, int col, const Vector< real_type > &element)
Convenience function to assemble the matrix.
Definition sparseblockmat.h:263

dg::CooSparseBlockMat::num_rows
int num_rows
number of rows
Definition sparseblockmat.h:343

dg::CooSparseBlockMat::total_num_rows
int total_num_rows() const
total number of rows is num_rows*n*left_size*right_size
Definition sparseblockmat.h:286

dg::CooSparseBlockMat::cols_idx
Vector< int > cols_idx
is of size num_entries and contains the column indices
Definition sparseblockmat.h:340

dg::CooSparseBlockMat::symv
void symv(SharedVectorTag, SerialTag, value_type alpha, const value_type **x, value_type beta, value_type *RESTRICT y) const
Apply the matrix to a vector.
Definition sparseblockmat.h:306

dg::CudaTag
CUDA implementation.
Definition execution_policy.h:27

dg::EllSparseBlockMat
Ell Sparse Block Matrix format.
Definition sparseblockmat.h:46

dg::EllSparseBlockMat::right_size
int right_size
size of the right Kronecker delta (is e.g 1 for a x - derivative)
Definition sparseblockmat.h:177

dg::EllSparseBlockMat::set_left_size
void set_left_size(int new_left_size)
Set  left_size = new_left_size;
Definition sparseblockmat.h:157

dg::EllSparseBlockMat::invalid_index
static constexpr int invalid_index
Definition sparseblockmat.h:49

dg::EllSparseBlockMat::set_default_range
void set_default_range()
Set  right_range[0] = 0, right_range[1] = right_size
Definition sparseblockmat.h:147

dg::EllSparseBlockMat::num_rows
int num_rows
number of block rows, each row contains blocks
Definition sparseblockmat.h:172

dg::EllSparseBlockMat::data
Vector< real_type > data
The data array is of size n*n*num_different_blocks and contains the blocks. The first block is contai...
Definition sparseblockmat.h:168

dg::EllSparseBlockMat::blocks_per_line
int blocks_per_line
number of blocks in each line
Definition sparseblockmat.h:174

dg::EllSparseBlockMat::num_cols
int num_cols
number of block columns
Definition sparseblockmat.h:173

dg::EllSparseBlockMat::EllSparseBlockMat
EllSparseBlockMat(int num_block_rows, int num_block_cols, int num_blocks_per_line, int num_different_blocks, int n)
Allocate storage.
Definition sparseblockmat.h:67

dg::EllSparseBlockMat::asCuspMatrix
dg::SparseMatrix< int, real_type, thrust::host_vector > asCuspMatrix() const
Convert to a sparse matrix.

dg::EllSparseBlockMat::EllSparseBlockMat
EllSparseBlockMat(const EllSparseBlockMat< other_real_type, Other_Vector > &src)
Definition sparseblockmat.h:85

dg::EllSparseBlockMat::display
void display(std::ostream &os=std::cout, bool show_data=false) const
Display internal data to a stream.

dg::EllSparseBlockMat::data_idx
Vector< int > data_idx
has the same size as cols_idx and contains indices into the data array, i.e. the block number
Definition sparseblockmat.h:170

dg::EllSparseBlockMat::EllSparseBlockMat
EllSparseBlockMat()=default
default constructor does nothing

dg::EllSparseBlockMat::total_num_cols
int total_num_cols() const
total number of columns is num_cols*n*left_size*right_size
Definition sparseblockmat.h:99

dg::EllSparseBlockMat::set_right_size
void set_right_size(int new_right_size)
Set  right_size = new_right_size; set_default_range();
Definition sparseblockmat.h:152

dg::EllSparseBlockMat::cols_idx
Vector< int > cols_idx
is of size num_rows*num_blocks_per_line and contains the column indices % n into the vector
Definition sparseblockmat.h:169

dg::EllSparseBlockMat::total_num_rows
int total_num_rows() const
total number of rows is num_rows*n*left_size*right_size
Definition sparseblockmat.h:95

dg::EllSparseBlockMat::right_range
Vector< int > right_range
range (can be used to apply the matrix to only part of the right rows
Definition sparseblockmat.h:171

dg::EllSparseBlockMat::symv
void symv(SharedVectorTag, SerialTag, value_type alpha, const value_type *RESTRICT x, value_type beta, value_type *RESTRICT y) const
Apply the matrix to a vector.
Definition sparseblockmat.h:121

dg::EllSparseBlockMat::left_size
int left_size
size of the left Kronecker delta
Definition sparseblockmat.h:176

dg::EllSparseBlockMat::n
int n
each block has size n*n
Definition sparseblockmat.h:175

dg::EllSparseBlockMat::symv
void symv(SharedVectorTag, CudaTag, value_type alpha, const value_type *x, value_type beta, value_type *y) const
Definition sparseblockmat.h:126

dg::OmpTag
OpenMP parallel execution.
Definition execution_policy.h:28

dg::SerialTag
Indicate sequential execution.
Definition execution_policy.h:26

dg::SharedVectorTag
Indicate a contiguous chunk of shared memory.
Definition vector_categories.h:41

dg::SparseBlockMatrixTag
indicate our sparse block matrix format
Definition matrix_categories.h:33

dg::SparseMatrix
A CSR formatted sparse matrix.
Definition sparsematrix.h:96

dg::SparseMatrix::setFromCoo
void setFromCoo(size_t num_rows, size_t num_cols, const Vector< Index > &row_indices, const Vector< Index > &column_indices, const Vector< Value > &values, bool sort=false)
Set csr values from coo formatted sparse matrix.
Definition sparsematrix.h:171

dg::TensorTraits< CooSparseBlockMat< T, V > >::execution_policy
dg::get_execution_policy< V< T > > execution_policy
Definition sparseblockmat.h:531

dg::TensorTraits< CooSparseBlockMat< T, V > >::value_type
T value_type
Definition sparseblockmat.h:529

dg::TensorTraits< EllSparseBlockMat< T, V > >::execution_policy
dg::get_execution_policy< V< T > > execution_policy
Definition sparseblockmat.h:524

dg::TensorTraits< EllSparseBlockMat< T, V > >::value_type
T value_type
Definition sparseblockmat.h:522

dg::TensorTraits
The vector traits.
Definition tensor_traits.h:38

tensor_traits.h

value_type
double value_type

dg::exblas::udouble

dg::exblas::udouble::i
int64_t i

dg::exblas::udouble::d
double d