dg/html/multiply_8h_source.html

#pragma once


#include "operator.h"

#include "dg/functors.h"

#include "dg/blas1.h"

#include "tensor.h"


namespace dg

{

// nvcc does not like local classes so we need to define these globally:

// \f$ y_i \leftarrow \lambda T_{ij} x_i + \mu y_i\f$

struct TensorMultiply2d

{

    template<class VL, class V0, class V1, class V2, class VM, class V3, class V4>

    DG_DEVICE

    void operator() ( VL lambda, V0 t00, V0 t01, V0 t10, V0 t11,

                      V1 in0, V2 in1, VM mu, V3& out0, V4& out1) const

    {

        auto tmp0 = DG_FMA(t00,in0 , t01*in1);

        auto tmp1 = DG_FMA(t10,in0 , t11*in1);

        auto temp = out1*mu;

        out1 = DG_FMA( lambda, tmp1, temp);

        temp = out0*mu;

        out0 = DG_FMA( lambda, tmp0, temp);

    }

};

// \f$ y_i \leftarrow \lambda T_{ij} x_i + \mu y_i\f$

struct TensorMultiply3d

{

    template<class VL, class V0, class V1, class V2, class V3, class VM, class V4, class V5, class V6>

    DG_DEVICE

    void operator() ( VL lambda,

                      V0 t00, V0 t01, V0 t02,

                      V0 t10, V0 t11, V0 t12,

                      V0 t20, V0 t21, V0 t22,

                      V1 in0, V2 in1, V3 in2,

                      VM mu,

                      V4& out0, V5& out1, V6& out2) const

    {

        auto tmp0 = DG_FMA( t00,in0 , (DG_FMA( t01,in1 , t02*in2)));

        auto tmp1 = DG_FMA( t10,in0 , (DG_FMA( t11,in1 , t12*in2)));

        auto tmp2 = DG_FMA( t20,in0 , (DG_FMA( t21,in1 , t22*in2)));

        auto temp = out2*mu;

        out2 = DG_FMA( lambda, tmp2, temp);

        temp = out1*mu;

        out1 = DG_FMA( lambda, tmp1, temp);

        temp = out0*mu;

        out0 = DG_FMA( lambda, tmp0, temp);

    }

};

// \f$ y_i \leftarrow \lambda T^{-1}_{ij} x_i + \mu y_i\f$

struct InverseTensorMultiply2d

{

    template<class VL, class V0, class V1, class V2, class VM, class V3, class V4>

    DG_DEVICE

    void operator() ( VL lambda, V0 t00, V0 t01, V0 t10, V0 t11,

                      V1 in0, V2 in1, VM mu, V3& out0, V4& out1) const

    {

        auto dett = DG_FMA( t00,t11 , (-t10*t01));

        auto tmp0 = DG_FMA( in0,t11 , (-in1*t01));

        auto tmp1 = DG_FMA( t00,in1 , (-t10*in0));

        auto temp = out1*mu;

        out1 = DG_FMA( lambda, tmp1/dett, temp);

        temp = out0*mu;

        out0 = DG_FMA( lambda, tmp0/dett, temp);

    }

};

// \f$ y_i \leftarrow \lambda T^{-1}_{ij} x_i + \mu y_i\f$

struct InverseTensorMultiply3d

{

    template<class VL, class V0, class V1, class V2, class V3, class VM, class V4, class V5, class V6>

    DG_DEVICE

    void operator() ( VL lambda,

                  V0 t00, V0 t01, V0 t02,

                  V0 t10, V0 t11, V0 t12,

                  V0 t20, V0 t21, V0 t22,

                  V1 in0, V2 in1, V3 in2,

                  VM mu,

                  V4& out0, V5& out1, V6& out2) const

    {

        auto dett = t00*DG_FMA(t11, t22, (-t12*t21))

                   -t01*DG_FMA(t10, t22, (-t20*t12))

                   +t02*DG_FMA(t10, t21, (-t20*t11));


        auto tmp0 = in0*DG_FMA(t11, t22, (-t12*t21))

                   -t01*DG_FMA(in1, t22, (-in2*t12))

                   +t02*DG_FMA(in1, t21, (-in2*t11));

        auto tmp1 = t00*DG_FMA(in1, t22, (-t12*in2))

                   -in0*DG_FMA(t10, t22, (-t20*t12))

                   +t02*DG_FMA(t10, in2, (-t20*in1));

        auto tmp2 = t00*DG_FMA(t11, in2, (-in1*t21))

                   -t01*DG_FMA(t10, in2, (-t20*in1))

                   +in0*DG_FMA(t10, t21, (-t20*t11));

        auto temp = out2*mu;

        out2 = DG_FMA( lambda, tmp2/dett, temp);

        temp = out1*mu;

        out1 = DG_FMA( lambda, tmp1/dett, temp);

        temp = out0*mu;

        out0 = DG_FMA( lambda, tmp0/dett, temp);

    }

};

//\f$ y = t_{00} t_{11} - t_{10}t_{01} \f$

struct TensorDeterminant2d

{

    template<class value_type>

    DG_DEVICE

    value_type operator() ( value_type t00, value_type t01,

                            value_type t10, value_type t11) const

    {

        return DG_FMA( t00,t11 , (-t10*t01));

    }

};

//\f$ y = t_{00} t_{11}t_{22} + t_{01}t_{12}t_{20} + t_{02}t_{10}t_{21} - t_{02}t_{11}t_{20} - t_{01}t_{10}t_{22} - t_{00}t_{12}t_{21} \f$

struct TensorDeterminant3d

{

    template<class value_type>

    DG_DEVICE

    value_type operator() ( value_type t00, value_type t01, value_type t02,

                            value_type t10, value_type t11, value_type t12,

                            value_type t20, value_type t21, value_type t22) const

    {

        return t00* DG_FMA( t11,t22 , (-t21*t12))

              -t01* DG_FMA( t10,t22 , (-t20*t12))

              +t02* DG_FMA( t10,t21 , (-t20*t11));

    }

};


// \f$ y = \lambda\mu v_i T_{ij} w_j \f$

struct TensorDot2d

{

    template<class VL, class V0, class V1, class V2, class VM, class V3, class V4>

    DG_DEVICE

    auto operator() (

              VL lambda, V0 v0, V1 v1,

              V2 t00, V2 t01,

              V2 t10, V2 t11,

              VM mu,     V3 w0, V4 w1

              ) const

    {

        auto tmp0 = DG_FMA(t00,w0 , t01*w1);

        auto tmp1 = DG_FMA(t10,w0 , t11*w1);

        return lambda*mu*DG_FMA(v0,tmp0  , v1*tmp1);

    }

};

// \f$ y = \lambda \mu v_i T_{ij} w_j \f$

struct TensorDot3d

{

    template<class VL, class V0, class V1, class V2, class V3, class VM, class V4, class V5, class V6>

    DG_DEVICE

    auto operator() (

              VL lambda,

              V0 v0,  V1 v1,  V2 v2,

              V3 t00, V3 t01, V3 t02,

              V3 t10, V3 t11, V3 t12,

              V3 t20, V3 t21, V3 t22,

              VM mu,

              V4 w0, V5 w1, V6 w2) const

    {

        auto tmp0 = DG_FMA( t00,w0 , (DG_FMA( t01,w1 , t02*w2)));

        auto tmp1 = DG_FMA( t10,w0 , (DG_FMA( t11,w1 , t12*w2)));

        auto tmp2 = DG_FMA( t20,w0 , (DG_FMA( t21,w1 , t22*w2)));

        return lambda*mu*DG_FMA(v0,tmp0 , DG_FMA(v1,tmp1 , v2*tmp2));

    }

};


namespace tensor

{


template<class ContainerType0, class ContainerType1>


void scal( SparseTensor<ContainerType0>& t, const ContainerType1& mu)

{

    unsigned size=t.values().size();

    for( unsigned i=0; i<size; i++)

        dg::blas1::pointwiseDot( mu, t.values()[i], t.values()[i]);

}


template<class ContainerTypeL, class ContainerType0, class ContainerType1, class ContainerType2, class ContainerTypeM, class ContainerType3, class ContainerType4>


void multiply2d( const ContainerTypeL& lambda, const SparseTensor<ContainerType0>& t, const ContainerType1& in0, const ContainerType2& in1, const ContainerTypeM& mu, ContainerType3& out0, ContainerType4& out1)

{

    dg::blas1::subroutine( dg::TensorMultiply2d(), lambda,

                           t.value(0,0), t.value(0,1),

                           t.value(1,0), t.value(1,1),

                           in0, in1, mu, out0, out1);

}


template<class ContainerTypeL, class ContainerType0, class ContainerType1, class ContainerType2, class ContainerType3, class ContainerTypeM, class ContainerType4, class ContainerType5, class ContainerType6>


void multiply3d( const ContainerTypeL& lambda, const SparseTensor<ContainerType0>& t, const ContainerType1& in0, const ContainerType2& in1, const ContainerType3& in2, const ContainerTypeM& mu, ContainerType4& out0, ContainerType5& out1, ContainerType6& out2)

{

    dg::blas1::subroutine(dg::TensorMultiply3d(),

            lambda,      t.value(0,0), t.value(0,1), t.value(0,2),

                         t.value(1,0), t.value(1,1), t.value(1,2),

                         t.value(2,0), t.value(2,1), t.value(2,2),

                         in0, in1, in2,

            mu,          out0, out1, out2);

}


template<class ContainerTypeL, class ContainerType0, class ContainerType1, class ContainerType2, class ContainerTypeM, class ContainerType3, class ContainerType4>


void inv_multiply2d( const ContainerTypeL& lambda, const SparseTensor<ContainerType0>& t, const ContainerType1& in0, const ContainerType2& in1, const ContainerTypeM& mu, ContainerType3& out0, ContainerType4& out1)

{

    dg::blas1::subroutine( dg::InverseTensorMultiply2d(), lambda,

                           t.value(0,0), t.value(0,1),

                           t.value(1,0), t.value(1,1),

                           in0,  in1, mu, out0, out1);

}


template<class ContainerTypeL, class ContainerType0, class ContainerType1, class ContainerType2, class ContainerType3, class ContainerTypeM, class ContainerType4, class ContainerType5, class ContainerType6>


void inv_multiply3d( const ContainerTypeL& lambda, const SparseTensor<ContainerType0>& t, const ContainerType1& in0, const ContainerType2& in1, const ContainerType3& in2, const ContainerTypeM& mu, ContainerType4& out0, ContainerType5& out1, ContainerType6& out2)

{

    dg::blas1::subroutine( dg::InverseTensorMultiply3d(),

           lambda,       t.value(0,0), t.value(0,1), t.value(0,2),

                         t.value(1,0), t.value(1,1), t.value(1,2),

                         t.value(2,0), t.value(2,1), t.value(2,2),

                         in0, in1, in2,

           mu,           out0, out1, out2);

}


template<class ContainerType>


ContainerType determinant2d( const SparseTensor<ContainerType>& t)

{

    ContainerType det = t.value(0,0);

    dg::blas1::evaluate( det, dg::equals(), dg::TensorDeterminant2d(),

                           t.value(0,0), t.value(0,1),

                           t.value(1,0), t.value(1,1));

    return det;

}


template<class ContainerType>


ContainerType determinant( const SparseTensor<ContainerType>& t)

{

    ContainerType det = t.value(0,0);

    dg::blas1::evaluate( det, dg::equals(), TensorDeterminant3d(),

                           t.value(0,0), t.value(0,1), t.value(0,2),

                           t.value(1,0), t.value(1,1), t.value(1,2),

                           t.value(2,0), t.value(2,1), t.value(2,2));

    return det;

}


template<class ContainerType>


ContainerType volume2d( const SparseTensor<ContainerType>& t)

{

    ContainerType vol=determinant2d(t);

    dg::blas1::transform(vol, vol, dg::InvSqrt<get_value_type<ContainerType>>());

    return vol;

}


template<class ContainerType>


ContainerType volume( const SparseTensor<ContainerType>& t)

{

    ContainerType vol=determinant(t);

    dg::blas1::transform(vol, vol, dg::InvSqrt<get_value_type<ContainerType>>());

    return vol;

}


//For convenience

template<class ContainerType0, class ContainerType1, class ContainerType2, class ContainerType3, class ContainerType4>


void multiply2d( const SparseTensor<ContainerType0>& t, const ContainerType1& in0, const ContainerType2& in1, ContainerType3& out0, ContainerType4& out1)

{

    multiply2d( 1, t, in0, in1, 0., out0, out1);

}


template<class ContainerType0, class ContainerType1, class ContainerType2, class ContainerType3, class ContainerType4, class ContainerType5, class ContainerType6>


void multiply3d( const SparseTensor<ContainerType0>& t, const ContainerType1& in0, const ContainerType2& in1, const ContainerType3& in2, ContainerType4& out0, ContainerType5& out1, ContainerType6& out2)

{

    multiply3d( 1., t, in0, in1, in2, 0., out0, out1, out2);

}


template<class ContainerType0, class ContainerType1, class ContainerType2, class ContainerType3, class ContainerType4>


void inv_multiply2d( const SparseTensor<ContainerType0>& t, const ContainerType1& in0, const ContainerType2& in1, ContainerType3& out0, ContainerType4& out1)

{

    inv_multiply2d( 1., t, in0, in1, out0, out1);

}


template<class ContainerType0, class ContainerType1, class ContainerType2, class ContainerType3, class ContainerType4, class ContainerType5, class ContainerType6>


void inv_multiply3d( const SparseTensor<ContainerType0>& t, const ContainerType1& in0, const ContainerType2& in1, const ContainerType3& in2, ContainerType4& out0, ContainerType5& out1, ContainerType6& out2)

{

    inv_multiply3d( 1., t, in0, in1, in2, 0., out0, out1, out2);

}


template<class ContainerTypeL, class ContainerType0, class ContainerType1, class ContainerType2, class ContainerType3, class ContainerTypeM, class ContainerType4, class ContainerType5, class value_type0, class value_type1>


void scalar_product2d(

        value_type0 alpha,

        const ContainerTypeL& lambda,

        const ContainerType0& v0,

        const ContainerType1& v1,

        const SparseTensor<ContainerType2>& t,

        const ContainerTypeM& mu,

        const ContainerType3& w0,

        const ContainerType4& w1,

        value_type1 beta,

        ContainerType5& y)

{

    dg::blas1::evaluate( y,

             dg::Axpby( alpha, beta),

             dg::TensorDot2d(),

             lambda, v0, v1,

             t.value(0,0), t.value(0,1),

             t.value(1,0), t.value(1,1),

             mu, w0, w1);

}


template<class ContainerTypeL, class ContainerType0, class ContainerType1, class ContainerType2, class ContainerType3, class ContainerTypeM, class ContainerType4, class ContainerType5, class ContainerType6, class ContainerType7, class value_type0, class value_type1>


void scalar_product3d(

        value_type0 alpha,

        const ContainerTypeL& lambda,

        const ContainerType0& v0,

        const ContainerType1& v1,

        const ContainerType2& v2,

        const SparseTensor<ContainerType3>& t,

        const ContainerTypeM& mu,

        const ContainerType4& w0,

        const ContainerType5& w1,

        const ContainerType6& w2,

        value_type1 beta,

        ContainerType7& y)

{

    dg::blas1::evaluate( y,

            dg::Axpby( alpha, beta),

            dg::TensorDot3d(),

            lambda,

            v0, v1, v2,

            t.value(0,0), t.value(0,1), t.value(0,2),

            t.value(1,0), t.value(1,1), t.value(1,2),

            t.value(2,0), t.value(2,1), t.value(2,2),

            mu,

            w0, w1, w2);

}


}//namespace tensor

}//namespace dg

blas1.h

functors.h

dg::blas1::pointwiseDot
void pointwiseDot(value_type alpha, const ContainerType1 &x1, const ContainerType2 &x2, value_type1 beta, ContainerType &y)
Definition blas1.h:406

dg::blas1::transform
void transform(const ContainerType1 &x, ContainerType &y, UnaryOp op)
Definition blas1.h:585

dg::blas1::subroutine
void subroutine(Subroutine f, ContainerType &&x, ContainerTypes &&... xs)
; Customizable and generic blas1 function
Definition blas1.h:677

dg::blas1::evaluate
void evaluate(ContainerType &y, BinarySubroutine f, Functor g, const ContainerType0 &x0, const ContainerTypes &...xs)
Definition blas1.h:612

dg::coo2d::y
@ y
y direction

dg::get_value_type
typename TensorTraits< std::decay_t< Vector > >::value_type get_value_type
Definition tensor_traits.h:45

dg::tensor::inv_multiply3d
void inv_multiply3d(const ContainerTypeL &lambda, const SparseTensor< ContainerType0 > &t, const ContainerType1 &in0, const ContainerType2 &in1, const ContainerType3 &in2, const ContainerTypeM &mu, ContainerType4 &out0, ContainerType5 &out1, ContainerType6 &out2)
i
Definition multiply.h:292

dg::tensor::scalar_product2d
void scalar_product2d(value_type0 alpha, const ContainerTypeL &lambda, const ContainerType0 &v0, const ContainerType1 &v1, const SparseTensor< ContainerType2 > &t, const ContainerTypeM &mu, const ContainerType3 &w0, const ContainerType4 &w1, value_type1 beta, ContainerType5 &y)
Definition multiply.h:493

dg::tensor::determinant2d
ContainerType determinant2d(const SparseTensor< ContainerType > &t)
Definition multiply.h:312

dg::tensor::inv_multiply2d
void inv_multiply2d(const ContainerTypeL &lambda, const SparseTensor< ContainerType0 > &t, const ContainerType1 &in0, const ContainerType2 &in1, const ContainerTypeM &mu, ContainerType3 &out0, ContainerType4 &out1)
Definition multiply.h:266

dg::tensor::scalar_product3d
void scalar_product3d(value_type0 alpha, const ContainerTypeL &lambda, const ContainerType0 &v0, const ContainerType1 &v1, const ContainerType2 &v2, const SparseTensor< ContainerType3 > &t, const ContainerTypeM &mu, const ContainerType4 &w0, const ContainerType5 &w1, const ContainerType6 &w2, value_type1 beta, ContainerType7 &y)
Definition multiply.h:533

dg::tensor::multiply2d
void multiply2d(const ContainerTypeL &lambda, const SparseTensor< ContainerType0 > &t, const ContainerType1 &in0, const ContainerType2 &in1, const ContainerTypeM &mu, ContainerType3 &out0, ContainerType4 &out1)
Definition multiply.h:215

dg::tensor::determinant
ContainerType determinant(const SparseTensor< ContainerType > &t)
Definition multiply.h:332

dg::tensor::volume
ContainerType volume(const SparseTensor< ContainerType > &t)
Definition multiply.h:389

dg::tensor::multiply3d
void multiply3d(const ContainerTypeL &lambda, const SparseTensor< ContainerType0 > &t, const ContainerType1 &in0, const ContainerType2 &in1, const ContainerType3 &in2, const ContainerTypeM &mu, ContainerType4 &out0, ContainerType5 &out1, ContainerType6 &out2)
Definition multiply.h:240

dg::tensor::scal
void scal(SparseTensor< ContainerType0 > &t, const ContainerType1 &mu)
Definition multiply.h:192

dg::tensor::volume2d
ContainerType volume2d(const SparseTensor< ContainerType > &t)
Definition multiply.h:362

DG_DEVICE
#define DG_DEVICE
Expands to __host__ __device__ if compiled with nvcc else is empty.
Definition dg_doc.h:378

alpha
const double alpha

dg
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...

operator.h

beta
const double beta

dg::InvSqrt
Definition functors.h:83

dg::SparseTensor
Class for 2x2 and 3x3 matrices sharing elements.
Definition tensor.h:51

dg::SparseTensor::values
std::vector< container > & values()
Return write access to the values array.
Definition tensor.h:151

dg::SparseTensor::value
const container & value(size_t i, size_t j) const
Read access the underlying container.
Definition tensor.h:141

dg::equals
Definition subroutines.h:22

tensor.h

value_type
double value_type

mu
double mu(double s, unsigned i, unsigned n)