matrix/html/matrixprod_8h_source.html

#pragma once


#include "dg/algorithm.h"

#include "lanczos.h"

#include "contours.h"

#include "optimise.h"


namespace dg{

namespace mat{


template<class ContainerType>


struct ProductMatrixFunction

{

    using container_type = ContainerType;

    using value_type = dg::get_value_type<ContainerType>;

    ProductMatrixFunction() = default;


    ProductMatrixFunction( const ContainerType& copyable, unsigned max_iterations)

    {

        m_lanczos.construct( copyable, max_iterations);

        m_v = m_vp = m_vm = m_f = copyable;

    }


    template<class ...Params>


    void construct( Params&& ...ps)

    {

        //construct and swap

        *this = ProductMatrixFunction( std::forward<Params>( ps)...);

    }


    void set_benchmark( bool benchmark, std::string message = "ProductFunction"){

        m_benchmark = benchmark;

        m_message = message;

    }


    template<class ContainerType0, class BinaryOp, class ContainerType1,

        class MatrixType, class ContainerType2, class ContainerType3>


    unsigned apply(

            ContainerType0& x,

            BinaryOp op,

            const ContainerType1& diag,

            MatrixType&& A,

            const ContainerType2& b,

            const ContainerType3& weights,

            value_type eps,

            value_type nrmb_correction = 1.)

    {

#ifdef MPI_VERSION

        int rank;

        MPI_Comm_rank(MPI_COMM_WORLD, &rank);

#endif //MPI

        dg::Timer t;

        t.tic();

        auto func = make_FuncEigen_Te1( [&](value_type x) {return op(1., x);});

        auto T = m_lanczos.tridiag( func, std::forward<MatrixType>(A),

                b, weights, eps, nrmb_correction,

                "universal", 1.0, 2);

        compute_vlcl( op, diag, std::forward<MatrixType>(A), T, x, b,

                    m_lanczos.get_bnorm());

        t.toc();

        if( m_benchmark)

            DG_RANK0 std::cout << "# `"<<m_message<<"` solve with {"<<T.num_rows<<"} iterations took "<<t.diff()<<"s\n";

        return T.num_rows;

    }


    template<class ContainerType0, class BinaryOp, class MatrixType,

        class ContainerType1, class ContainerType2, class ContainerType3>


    unsigned apply_adjoint(

            ContainerType0& x,

            BinaryOp op,

            MatrixType&& A,

            const ContainerType1& diag,

            const ContainerType2& b,

            const ContainerType3& weights,

            value_type eps,

            value_type nrmb_correction = 1.)

    {

        // Should this be another class?

        // if A does not change Lanczos iterations could be reused from apply function!?

#ifdef MPI_VERSION

        int rank;

        MPI_Comm_rank(MPI_COMM_WORLD, &rank);

#endif //MPI

        dg::Timer t;

        t.tic();

        auto func = make_FuncEigen_Te1( [&](value_type x) {return op( x, 1.);});

        auto T = m_lanczos.tridiag( func, std::forward<MatrixType>(A),

                b, weights, eps, nrmb_correction,

                "universal", 1.0, 2);

        compute_vlcl_adjoint( op, std::forward<MatrixType>(A), diag, T, x, b,

                weights, m_lanczos.get_bnorm());


        t.toc();

        if( m_benchmark)

            DG_RANK0 std::cout << "# `"<<m_message<<"` solve with {"<<T.num_rows<<"} iterations took "<<t.diff()<<"s\n";

        return T.num_rows;

    }


    template< class BinaryOp, class ContainerType0, class MatrixType,

        class ContainerType1, class ContainerType2>


    void compute_vlcl( BinaryOp op, const ContainerType0& diag,

            MatrixType&& A,

            const TriDiagonal<thrust::host_vector<value_type>>& T,

            ContainerType1& x,

            const ContainerType2& b,

            value_type bnorm)

    {

        dg::blas1::copy(0., x);

        if( 0 == bnorm )

        {

            return;

        }

        unsigned iter = T.O.size();

        thrust::host_vector<value_type> evals = T.O, plus  = T.P;

        thrust::host_vector<value_type> work (2*iter-2);

        dg::SquareMatrix<value_type> EHt(iter);

        //Compute Eigendecomposition

        lapack::stev('V', evals, plus, EHt.data(), work);

        dg::blas1::axpby(1./bnorm, b, 0.0, m_v); //m_v[1] = b/||b||

        dg::blas1::copy(0., m_vm);

        // compute c_1 v_1

        for ( unsigned k=0; k<iter; k++)

        {

            dg::blas1::evaluate( m_f, dg::equals(), op, diag, evals[k]);

            dg::blas1::pointwiseDot( bnorm*EHt(k, 0)*EHt(k,0), m_f, m_v, 1.,

                    x);

        }

        for ( unsigned i=0; i<iter-1; i++)

        {

            dg::blas2::symv( std::forward<MatrixType>(A), m_v, m_vp);

            dg::blas1::axpbypgz(

                    -T.M[i]/T.P[i], m_vm,

                    -T.O[i]/T.P[i], m_v,

                        1.0/T.P[i], m_vp);

            m_vm.swap( m_v);

            m_v.swap( m_vp);

            // compute c_l v_l

            for ( unsigned k=0; k<iter; k++)

            {

                dg::blas1::evaluate( m_f, dg::equals(), op, diag, evals[k]);

                dg::blas1::pointwiseDot( bnorm*EHt(k,0)*EHt(k,i+1), m_f, m_v,

                        1., x);

            }

        }

    }


    template< class BinaryOp, class MatrixType, class ContainerType0,

        class ContainerType1, class ContainerType2, class ContainerType3>


    void compute_vlcl_adjoint( BinaryOp op,

            MatrixType&& A,

            const ContainerType0& diag,

            const TriDiagonal<thrust::host_vector<value_type>>& T,

            ContainerType1& x,

            const ContainerType2& b,

            const ContainerType3& weights,

            value_type bnorm)

    {

        dg::blas1::copy(0., x);

        if( 0 == bnorm )

        {

            return;

        }

        unsigned iter = T.O.size();

        thrust::host_vector<value_type> evals = T.O, plus  = T.P;

        thrust::host_vector<value_type> work (2*iter-2);

        dg::SquareMatrix<value_type> EHt(iter);

        //Compute Eigendecomposition

        lapack::stev('V', evals, plus, EHt.data(), work);

        dg::blas1::axpby(1./bnorm, b, 0.0, m_v); //m_v[1] = b/||b||

        dg::blas1::copy(0., m_vm);

        // compute alpha_i1

        dg::SquareMatrix<value_type> alpha(iter);

        for ( unsigned k=0; k<iter; k++)

        {

            dg::blas1::evaluate( m_f, dg::equals(), op, evals[k], diag);

            dg::blas1::pointwiseDot( m_f, m_v, m_f);

            alpha( k,0) = dg::blas2::dot( m_f, weights, b);

        }

        for ( unsigned i=0; i<iter-1; i++)

        {

            dg::blas2::symv( std::forward<MatrixType>(A), m_v, m_vp);

            dg::blas1::axpbypgz(

                    -T.M[i]/T.P[i], m_vm,

                    -T.O[i]/T.P[i], m_v,

                        1.0/T.P[i], m_vp);

            m_vm.swap( m_v);

            m_v.swap( m_vp);

            for ( unsigned k=0; k<iter; k++)

            {

                dg::blas1::evaluate( m_f, dg::equals(), op, evals[k], diag);

                dg::blas1::pointwiseDot( m_f, m_v, m_f);

                alpha( k,i+1) = dg::blas2::dot( m_f, weights, b);

            }

        }

        // Observation: With an exponential function the lines of alpha get extremely small (because exp(lambda) gets very small ... so maybe one can save a few scalar products

        // compute E_li E_ki alpha_ik v_l

        std::vector<double> cl( iter, 0.0);

        for( unsigned l=0; l<iter; l++)

            for( unsigned i=0; i<iter; i++)

                for( unsigned k=0; k<iter; k++)

                    cl[l] += EHt(k,i)*alpha(k,i)*EHt(k,l);

        // 3rd Lanczos iteration

        dg::blas1::axpby(1./bnorm, b, 0.0, m_v); //m_v[1] = b/||b||

        dg::blas1::copy(0., m_vm);

        dg::blas1::axpby( cl[0], m_v, 1., x);

        for ( unsigned i=0; i<iter-1; i++)

        {

            dg::blas2::symv( std::forward<MatrixType>(A), m_v, m_vp);

            dg::blas1::axpbypgz(

                    -T.M[i]/T.P[i], m_vm,

                    -T.O[i]/T.P[i], m_v,

                        1.0/T.P[i], m_vp);

            m_vm.swap( m_v);

            m_v.swap( m_vp);

            dg::blas1::axpby( cl[i+1], m_v, 1., x);

        }

    }


    UniversalLanczos<ContainerType>& lanczos() { return m_lanczos;}

    private:


    UniversalLanczos<ContainerType> m_lanczos;

    bool m_benchmark = true;

    std::string m_message = "ProductFunction";

    ContainerType  m_v, m_vp, m_vm, m_f;

};


template<class Geometry, class Matrix, class ComplexContainer>


struct CauchyMatrixProduct

{

    CauchyMatrixProduct() = default;


    CauchyMatrixProduct( double lm_eps, const Geometry& grid, unsigned stages, bool adjoint = true )

    : m_eps( lm_eps),

    m_multi( grid, stages),

    m_previous( 2, {1, m_multi.copyable()}),

    m_z( m_multi.copyable()),

    m_rhs( m_multi.copyable()),

    m_grid_points(grid.size()),

    m_cauchy_opt(),

    m_adjoint(adjoint)

    {

    }


    const dg::MultigridCG2d<Geometry, Matrix, ComplexContainer,

        dg::complex_symmetric>& multigrid() const { return m_multi;}


    void clear_cache(){

        m_EV_up2date = false;

    }


    void set_verbose( bool verbose) {

        m_verbose = verbose;

        m_cauchy_opt.set_verbose(verbose);

    }


    unsigned num_nodes() const { return m_cauchy_opt.num_nodes();}


    void set_adjoint( bool adjoint) {

        // reset solution cache

        if( m_adjoint != adjoint)

        {

            m_previous.assign( m_previous.size(), {1, m_multi.copyable()});

            m_adjoint = adjoint;

        }

    }


    bool get_adjoint() const { return m_adjoint;}


    template<class MatrixType, class UnaryFunc, class UnaryFuncD,

        class ContainerType0, class ContainerType1, class ContainerType2>


    void solve( ContainerType0& x, UnaryFunc func, UnaryFuncD dxfunc, std::vector<MatrixType>& ops,

        const ContainerType1& d, const ContainerType2& b, std::vector<double> eps)

    {

#ifdef MPI_VERSION

        int rank;

        MPI_Comm_rank(MPI_COMM_WORLD, &rank);

#endif //MPI

        // 1. (re)compute current nodes and weights

        if( !m_EV_up2date)

        {

            // 1. Compute extreme Eigenvalues

            update_extremeEVs( ops);

        }

        double dmin = dg::blas1::reduce( d, +1e300, thrust::minimum());

        double dmax = dg::blas1::reduce( d, -1e300, thrust::maximum());

        bool changed = false;

        if( m_verbose )

        {

            DG_RANK0 std::cout << "# "<<dmin<<" < D < "<<dmax<<"\n";

            DG_RANK0 std::cout << "# "<<m_lmin<<" < Lambda < "<<m_lmax<<"\n";

        }

        const auto& zkwk = m_cauchy_opt.update_zkwk( changed, func, dxfunc,

            m_lmin, m_lmax, dmin, dmax, m_with_zero, m_eps);

        if( changed) // if the nodes change we need to re-alloced solution space.

            m_previous.assign( m_cauchy_opt.num_nodes(), {1, m_multi.copyable()});


        if( m_verbose )

        {

            DG_RANK0 std::cout << "# Current number of nodes "<<m_cauchy_opt.num_nodes()<<"\n";

        }


        thrust::complex<double> zk, wk;

        struct ShiftedOp

        {

            ShiftedOp( MatrixType& mat, const thrust::complex<double>& zk)

            : m_zk(zk), m_mat(mat){}

            void operator()( const ComplexContainer& x, ComplexContainer& y)

            {

                // Question: does COCG not care if matrix is positive/negative definite?

                // maybe not, since matrix does not have real EV anyways?

                dg::blas2::symv( m_mat, x, y);

                dg::blas1::axpby( m_zk, x, -1., y);

            }

            auto weights() const { return m_mat.weights();}

            auto precond() const { return m_mat.precond();}

            private:

            const thrust::complex<double>& m_zk;

            MatrixType& m_mat;

        };

        std::vector<ShiftedOp > shifted_ops;

        for( unsigned u=0; u<m_multi.stages(); u++)

            shifted_ops.push_back( ShiftedOp{ ops[u], zk});


        dg::blas1::copy( 0., x);

        for( unsigned k=0; k<m_cauchy_opt.num_nodes(); k++)

        //for( int k=m_cauchy_opt.num_nodes()-1; k>=0; k--)

        {

            zk = zkwk.first[k];

            wk = zkwk.second[k];

            // std::cout << "Zk wk "<<zk<<" "<<wk<<"\n";

            // The very first m_z is zero: this should work in COCG as an allowed initial guess

            m_previous[k].extrapolate( m_z);


            if( m_adjoint)

            {

                dg::blas1::axpby( zk, d, 0., m_rhs);

                dg::blas1::transform ( m_rhs, m_rhs, func);

                dg::blas1::pointwiseDot( wk, m_rhs, b, 0., m_rhs);

                m_multi.solve( shifted_ops, m_z, m_rhs, eps);

                m_previous[k].update( m_z);

            }

            else

            {

                dg::blas1::axpby( wk, b, 0., m_rhs);

                m_multi.solve( shifted_ops, m_z, m_rhs, eps);

                m_previous[k].update( m_z);

                dg::blas1::axpby( zk, d, 0., m_rhs);

                dg::blas1::transform ( m_rhs, m_rhs, func);

                dg::blas1::pointwiseDot( m_rhs, m_z, m_z);

            }

            dg::blas1::subroutine([]DG_DEVICE( thrust::complex<double> z, double& x) {

                x += 2*z.real();}, m_z, x );

        }

        //std::cout << "SOL\n";

        //for( unsigned u=0; u<10; u++)

        //    std::cout << x[u]<<" ";

        //std::cout << std::endl;

    }


    private:


    template<class MatrixType>

    void update_extremeEVs(std::vector<MatrixType>& ops )

    {

        dg::mat::UniversalLanczos<ComplexContainer> lanczos( ops[0].weights(), 2000);

        if( m_verbose)

            lanczos.set_verbose(true);

        auto rnd = ops[0].weights();

        dg::blas1::transform( rnd, rnd, dg::RandomNumbers<double>(0.0,1.0));

        auto T = lanczos.tridiag( ops[0], rnd, ops[0].weights(), 1e-4, 1., "compute_extreme_EV");

        //auto T = lanczos.tridiag( ops[0], rnd, ops[0].weights());

        auto EVs = dg::mat::compute_extreme_EV( T);

        // Let's use 10% safety range here (maybe test more but initial test show a slight improvement)

        m_lmax = 1.1*EVs[1];

        m_lmin = 0.9*EVs[0];

        m_with_zero = false;

        if( m_lmin < 1e-10*m_lmax)

        {

            if( m_verbose) DG_RANK0 std::cout << "# Found zero EV!\n";

            m_lmin = m_lmax/ m_grid_points;

            m_with_zero = true;

        }

        m_EV_up2date = true;

    }


    double m_eps;

    MultigridCG2d<Geometry, Matrix, ComplexContainer, dg::complex_symmetric> m_multi; // does not remember any solutions

    std::vector<dg::Extrapolation<ComplexContainer, double>> m_previous; // previous solutions for every zk

    ComplexContainer m_z, m_rhs; // complex vectors

    unsigned m_grid_points;

    CauchyOptimizer m_cauchy_opt;

    bool m_adjoint = true;


    bool m_with_zero = false;

    double m_lmin = 0, m_lmax = 0;

    bool m_EV_up2date = false;

    bool m_verbose = false;

};


}//namespace mat

}//namespace dg

dg::SquareMatrix

dg::SquareMatrix::data
const std::vector< value_type > & data() const

dg::mat::UniversalLanczos
Tridiagonalize  and approximate  via Lanczos algorithm. A is self-adjoint in the weights .
Definition lanczos.h:154

contours.h

dg::blas1::copy
void copy(const ContainerTypeIn &source, ContainerTypeOut &target)

dg::blas1::axpbypgz
void axpbypgz(value_type alpha, const ContainerType1 &x, value_type1 beta, const ContainerType2 &y, value_type2 gamma, ContainerType &z)

dg::blas1::axpby
void axpby(value_type alpha, const ContainerType1 &x, value_type1 beta, ContainerType &y)

dg::blas1::pointwiseDot
void pointwiseDot(value_type alpha, const ContainerType1 &x1, const ContainerType2 &x2, value_type1 beta, ContainerType &y)

dg::blas1::transform
void transform(const ContainerType1 &x, ContainerType &y, UnaryOp op)

dg::blas1::reduce
OutputType reduce(const ContainerType &x, OutputType zero, BinaryOp binary_op, UnaryOp unary_op=UnaryOp())

dg::blas1::subroutine
void subroutine(Subroutine f, ContainerType &&x, ContainerTypes &&... xs)

dg::blas1::evaluate
void evaluate(ContainerType &y, BinarySubroutine f, Functor g, const ContainerType0 &x0, const ContainerTypes &...xs)

dg::blas2::dot
auto dot(const ContainerType1 &x, const MatrixType &m, const ContainerType2 &y)

dg::blas2::symv
void symv(MatrixType &&M, const ContainerType1 &x, ContainerType2 &y)

dg::coo3d::z
@ z

dg::coo2d::y
@ y

dg::coo2d::x
@ x

dg::get_value_type
typename TensorTraits< std::decay_t< Vector > >::value_type get_value_type

dg::complex_symmetric
complex_symmetric

dg::mat::compute_extreme_EV
std::array< value_type, 2 > compute_extreme_EV(const dg::TriDiagonal< thrust::host_vector< value_type > > &T)
Compute extreme Eigenvalues of a symmetric tridiangular matrix.
Definition tridiaginv.h:727

dg::mat::make_FuncEigen_Te1
auto make_FuncEigen_Te1(UnaryOp f)
Create a functor that uses Eigenvalue decomposition to compute  for symmetric tridiagonal T.
Definition matrixfunction.h:29

DG_DEVICE
#define DG_DEVICE

lanczos.h

Matrix
dg::DMatrix Matrix
Definition lanczos_b.cpp:18

alpha
const double alpha
Definition lanczos_b.cpp:11

dg
Functions for optimizing Contours.

optimise.h

dg::MultigridCG2d

dg::Timer

dg::Timer::diff
double diff() const

dg::Timer::toc
void toc()

dg::Timer::tic
void tic()

dg::TriDiagonal

dg::UniformRealDistribution

dg::equals

dg::mat::CauchyMatrixProduct
Computation of  or  where  is a positive (semi)-definite matrix self-adjoint in the weights  .
Definition matrixprod.h:428

dg::mat::CauchyMatrixProduct::solve
void solve(ContainerType0 &x, UnaryFunc func, UnaryFuncD dxfunc, std::vector< MatrixType > &ops, const ContainerType1 &d, const ContainerType2 &b, std::vector< double > eps)
Compute the bivariate matrix function.
Definition matrixprod.h:513

dg::mat::CauchyMatrixProduct::clear_cache
void clear_cache()
Clear the cached Eigenvalues of the matrix in the solve method.
Definition matrixprod.h:453

dg::mat::CauchyMatrixProduct::set_adjoint
void set_adjoint(bool adjoint)
Determine if adjoint or direct bivariate matrix function is computed.
Definition matrixprod.h:478

dg::mat::CauchyMatrixProduct::num_nodes
unsigned num_nodes() const
Number of (complex) nodes used in the latest call to solve.
Definition matrixprod.h:470

dg::mat::CauchyMatrixProduct::multigrid
const dg::MultigridCG2d< Geometry, Matrix, ComplexContainer, dg::complex_symmetric > & multigrid() const
Access the internal multigrid method to be able to construct matrices.
Definition matrixprod.h:444

dg::mat::CauchyMatrixProduct::set_verbose
void set_verbose(bool verbose)
Verbose output to std::cout.
Definition matrixprod.h:461

dg::mat::CauchyMatrixProduct::CauchyMatrixProduct
CauchyMatrixProduct()=default

dg::mat::CauchyMatrixProduct::CauchyMatrixProduct
CauchyMatrixProduct(double lm_eps, const Geometry &grid, unsigned stages, bool adjoint=true)
Definition matrixprod.h:430

dg::mat::CauchyMatrixProduct::get_adjoint
bool get_adjoint() const
Current value of the adjoint parameter.
Definition matrixprod.h:488

dg::mat::ProductMatrixFunction
Computation of  and  where  is a positive definite matrix self-adjoint in the weights  .
Definition matrixprod.h:48

dg::mat::ProductMatrixFunction::compute_vlcl_adjoint
void compute_vlcl_adjoint(BinaryOp op, MatrixType &&A, const ContainerType0 &diag, const TriDiagonal< thrust::host_vector< value_type > > &T, ContainerType1 &x, const ContainerType2 &b, const ContainerType3 &weights, value_type bnorm)
Compute .
Definition matrixprod.h:312

dg::mat::ProductMatrixFunction::compute_vlcl
void compute_vlcl(BinaryOp op, const ContainerType0 &diag, MatrixType &&A, const TriDiagonal< thrust::host_vector< value_type > > &T, ContainerType1 &x, const ContainerType2 &b, value_type bnorm)
Compute .
Definition matrixprod.h:237

dg::mat::ProductMatrixFunction::ProductMatrixFunction
ProductMatrixFunction(const ContainerType &copyable, unsigned max_iterations)
Allocate memory for the method.
Definition matrixprod.h:60

dg::mat::ProductMatrixFunction::set_benchmark
void set_benchmark(bool benchmark, std::string message="ProductFunction")
Set or unset performance timings during iterations.
Definition matrixprod.h:74

dg::mat::ProductMatrixFunction::lanczos
UniversalLanczos< ContainerType > & lanczos()
Access the Lanczos class that is constructed with the constructor parameters.
Definition matrixprod.h:385

dg::mat::ProductMatrixFunction::container_type
ContainerType container_type
Definition matrixprod.h:49

dg::mat::ProductMatrixFunction::apply
unsigned apply(ContainerType0 &x, BinaryOp op, const ContainerType1 &diag, MatrixType &&A, const ContainerType2 &b, const ContainerType3 &weights, value_type eps, value_type nrmb_correction=1.)
Compute .
Definition matrixprod.h:108

dg::mat::ProductMatrixFunction::construct
void construct(Params &&...ps)
Perfect forward parameters to one of the constructors.
Definition matrixprod.h:67

dg::mat::ProductMatrixFunction::value_type
dg::get_value_type< ContainerType > value_type
Definition matrixprod.h:50

dg::mat::ProductMatrixFunction::ProductMatrixFunction
ProductMatrixFunction()=default
Construct empty.

dg::mat::ProductMatrixFunction::apply_adjoint
unsigned apply_adjoint(ContainerType0 &x, BinaryOp op, MatrixType &&A, const ContainerType1 &diag, const ContainerType2 &b, const ContainerType3 &weights, value_type eps, value_type nrmb_correction=1.)
Compute .
Definition matrixprod.h:171

DG_RANK0
#define DG_RANK0