dg/html/mpi__init_8h_source.html

#pragma once


#include <iostream>

#include <cassert>

#include <thrust/host_vector.h>

#include <thrust/device_vector.h> //declare THRUST_DEVICE_SYSTEM

#include "config.h"

#include "mpi_datatype.h"

#include "../enums.h"


namespace dg

{


inline void mpi_init( int argc, char* argv[])

{

#ifdef _OPENMP

    int provided, error;

    error = MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);

    assert( error == MPI_SUCCESS && "Threaded MPI lib required!\n");

#else

    MPI_Init(&argc, &argv);

#endif

#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA

    int rank;

    MPI_Comm_rank( MPI_COMM_WORLD, &rank);

    int num_devices=0;

    cudaGetDeviceCount(&num_devices);

    if(num_devices == 0)

    {

        std::cerr << "# No CUDA capable devices found on rank "<<rank<<std::endl;

        MPI_Abort(MPI_COMM_WORLD, -1);

        exit(-1);

    }

    int device = rank % num_devices; //assume # of gpus/node is fixed

    cudaSetDevice( device);

#endif//THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA

}


template<class T>


std::vector<T> mpi_read_as( unsigned num, MPI_Comm comm, std::istream& is = std::cin)

{

    int rank;

    MPI_Comm_rank( comm, &rank);

    std::vector<T> nums(num);

    if( rank == 0)

    {

        for( unsigned u=0; u<num; u++)

            is >> nums[u];

    }

    MPI_Bcast( &nums[0], num, getMPIDataType<T>(), 0, comm);

    return nums;

}


inline void mpi_read_grid( unsigned& n, std::vector<unsigned>& N, MPI_Comm comm,

    std::istream& is = std::cin, bool verbose = true, std::ostream& os = std::cout)

{

    int rank;

    MPI_Comm_rank( comm, &rank);

    const std::string Ns[6] = {"Nx", "Ny", "Nz", "Nu", "Nv", "Nw"};

    unsigned ndims = N.size();

    assert( ndims > 0);

    if(rank == 0 and verbose)

    {

        os << "# Type n, "<<Ns[0];

        for( unsigned u=1; u<ndims; u++)

            os << " and "<< Ns[u];

        os << std::endl;

    }

    auto vals = mpi_read_as<unsigned>( 1 + ndims, comm, is);

    n = vals[0];

    for( unsigned u=0; u<ndims; u++)

        N[u] = vals[1+u];

    if(rank == 0 and verbose)

    {

        os << "# On the grid "<<n;

        for( unsigned u=0; u<ndims; u++)

            os << " x "<< N[u];

        os << std::endl;

    }

}


inline void mpi_read_grid( unsigned& n, std::vector<unsigned*> N, MPI_Comm comm,

    std::istream& is = std::cin, bool verbose = true, std::ostream& os = std::cout)

{

    std::vector<unsigned> Ns( N.size());

    mpi_read_grid( n, Ns, comm, is, verbose, os);

    for( unsigned u=0; u<Ns.size(); u++)

        *N[u] = Ns[u];

}


inline MPI_Comm mpi_cart_create( MPI_Comm comm_old, std::vector<int> dims,

                    std::vector<int> periods, bool reorder = true)

{

    int size;

    MPI_Comm_size( comm_old, &size);

    assert( dims.size() == periods.size());

    int ndims = dims.size();

    int re = (int)reorder;

    int err = MPI_Dims_create( size, ndims, &dims[0]);

    if( err != MPI_SUCCESS)

        throw Error(Message(_ping_)<<

                "Cannot create Cartesian dimensions from given dimensions and size "<<size);

    int reduce = 1;

    for( int u=0; u<(int)ndims; u++)

        reduce *= dims[u];

    if( size != reduce)

    {

        throw Error(Message(_ping_)<<

            "ERROR: Process partition needs to match total number of processes! "

            <<size<< " vs "<<reduce);

    }

    MPI_Comm comm_cart;

    err = MPI_Cart_create( comm_old, ndims, &dims[0], &periods[0], re, &comm_cart);

    if( err != MPI_SUCCESS)

        throw Error(Message(_ping_)<<

                "Cannot create Cartesian comm from given communicator");

    return comm_cart;

}


inline MPI_Comm mpi_cart_create(

    std::vector<dg::bc> bcs,

    std::istream& is = std::cin,

    MPI_Comm comm_old = MPI_COMM_WORLD,

    bool reorder = true,

    bool verbose = true,

    std::ostream& os = std::cout)

{

    int rank, size;

    MPI_Comm_rank( comm_old, &rank);

    MPI_Comm_size( comm_old, &size);

    if(rank==0 && verbose)os << "# MPI v"<<MPI_VERSION<<"."<<MPI_SUBVERSION<<std::endl;

    unsigned ndims = bcs.size();

    assert( ndims != 0);

    std::vector<int> periods( ndims);

    for( unsigned u=0; u<ndims; u++)

    {

        if(bcs[u] == dg::PER)

            periods[u] = true;

        else

            periods[u] = false;

    }

    if( rank == 0)

    {

        if(verbose)

        {

            const std::string nps[6] = {"npx", "npy", "npz", "npu", "npv", "npw"};

            os << "# Type "<<nps[0];

            for( unsigned u=1; u<ndims; u++)

                os << " and "<< nps[u];

            os << std::endl;

        }

    }

    auto np = mpi_read_as<int>( ndims, comm_old, is);

    if( rank == 0)

    {

        if(verbose)

        {

            int num_threads = 1;

#ifdef _OPENMP

            num_threads = omp_get_max_threads( );

#endif //omp

            os << "# Computing with "<<np[0];

            for( unsigned u=1; u<ndims; u++)

                os << " x " <<np[u];

             os << " processes x " << num_threads<<" threads = "

                << size*num_threads<<" total"<<std::endl;

        }

    }

#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA

    int device=0;

    cudaGetDevice( &device);

    if( rank==0 and verbose)

    {

        std::cout << "# MPI is "

                  <<(cuda_aware_mpi ? "cuda-aware" : "NOT cuda-aware")

                  <<"!\n";

    }

    if(verbose)std::cout << "# Rank "<<rank<<" computes with device "<<device<<" !"<<std::endl;

#endif//THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA

    return dg::mpi_cart_create( comm_old, np, periods, reorder);

}


inline void mpi_init1d( dg::bc bcx, MPI_Comm& comm, std::istream& is =

std::cin, bool verbose = true)

{

    comm = mpi_cart_create( {bcx}, is, MPI_COMM_WORLD, true, verbose,

    std::cout);

}


inline void mpi_init2d( dg::bc bcx, dg::bc bcy, MPI_Comm& comm, std::istream&

is = std::cin, bool verbose = true)

{

    comm = mpi_cart_create( {bcx, bcy}, is, MPI_COMM_WORLD, true, verbose,

    std::cout);

}


inline void mpi_init3d( dg::bc bcx, dg::bc bcy, dg::bc bcz, MPI_Comm& comm,

std::istream& is = std::cin, bool verbose = true  )

{

    comm = mpi_cart_create( {bcx, bcy, bcz}, is, MPI_COMM_WORLD, true, verbose,

    std::cout);

}


inline void mpi_init1d( dg::bc bcx, unsigned& n, unsigned& N, MPI_Comm& comm,

std::istream& is = std::cin, bool verbose = true  )

{

    comm = mpi_cart_create( {bcx}, is, MPI_COMM_WORLD, true, verbose,

        std::cout);

    mpi_read_grid( n, {&N}, comm, is, verbose, std::cout);

}


inline void mpi_init2d( dg::bc bcx, dg::bc bcy, unsigned& n, unsigned& Nx,

unsigned& Ny, MPI_Comm& comm, std::istream& is = std::cin, bool verbose = true

)

{

    comm = mpi_cart_create( {bcx, bcy}, is, MPI_COMM_WORLD, true, verbose,

        std::cout);

    mpi_read_grid( n, {&Nx, &Ny}, comm, is, verbose, std::cout);

}


inline void mpi_init3d( dg::bc bcx, dg::bc bcy, dg::bc bcz, unsigned& n,

unsigned& Nx, unsigned& Ny, unsigned& Nz, MPI_Comm& comm, std::istream& is =

std::cin, bool verbose = true  )

{

    comm = mpi_cart_create( {bcx, bcy, bcz}, is, MPI_COMM_WORLD, true, verbose,

        std::cout);

    mpi_read_grid( n, {&Nx, &Ny, &Nz}, comm, is, verbose, std::cout);

}


} //namespace dg

dg::Error
class intended for the use in throw statements
Definition exceptions.h:83

dg::Message
small class holding a stringstream
Definition exceptions.h:29

enums.h
enums

_ping_
#define _ping_
Definition exceptions.h:12

dg::bc
bc
Switch between boundary conditions.
Definition enums.h:15

dg::PER
@ PER
periodic boundaries
Definition enums.h:16

dg::mpi_init3d
void mpi_init3d(dg::bc bcx, dg::bc bcy, dg::bc bcz, MPI_Comm &comm, std::istream &is=std::cin, bool verbose=true)
DEPRECATED: Short for.
Definition mpi_init.h:331

dg::mpi_init1d
void mpi_init1d(dg::bc bcx, MPI_Comm &comm, std::istream &is=std::cin, bool verbose=true)
DEPRECATED: Short for.
Definition mpi_init.h:302

dg::mpi_init2d
void mpi_init2d(dg::bc bcx, dg::bc bcy, MPI_Comm &comm, std::istream &is=std::cin, bool verbose=true)
DEPRECATED: Short for.
Definition mpi_init.h:316

dg::mpi_init
void mpi_init(int argc, char *argv[])
Convencience shortcut: Calls MPI_Init or MPI_Init_thread and inits CUDA devices.
Definition mpi_init.h:47

dg::mpi_cart_create
MPI_Comm mpi_cart_create(MPI_Comm comm_old, std::vector< int > dims, std::vector< int > periods, bool reorder=true)
Convenience call to MPI_Cart_create preceded by MPI_Dims_create.
Definition mpi_init.h:177

dg::mpi_read_grid
void mpi_read_grid(unsigned &n, std::vector< unsigned > &N, MPI_Comm comm, std::istream &is=std::cin, bool verbose=true, std::ostream &os=std::cout)
Read in grid sizes from is.
Definition mpi_init.h:112

dg::mpi_read_as
std::vector< T > mpi_read_as(unsigned num, MPI_Comm comm, std::istream &is=std::cin)
Read num values from is and broadcast to all processes as type T.
Definition mpi_init.h:81

bcy
dg::bc bcy

bcx
dg::bc bcx

n
const double n

mpi_datatype.h

dg
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...