5#include <thrust/host_vector.h>
6#include <thrust/device_vector.h>
35static inline void mpi_init(
int argc,
char* argv[])
39 error = MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
40 assert( error == MPI_SUCCESS &&
"Threaded MPI lib required!\n");
42 MPI_Init(&argc, &argv);
61static inline void mpi_init2d(
dg::bc bcx,
dg::bc bcy, MPI_Comm& comm, std::istream& is = std::cin,
bool verbose =
true )
64 MPI_Comm_rank( MPI_COMM_WORLD, &rank);
65 MPI_Comm_size( MPI_COMM_WORLD, &size);
66 if(rank==0 && verbose)std::cout <<
"# MPI v"<<MPI_VERSION<<
"."<<MPI_SUBVERSION<<std::endl;
67 int periods[2] = {
false,
false};
68 if( bcx ==
dg::PER) periods[0] =
true;
69 if( bcy ==
dg::PER) periods[1] =
true;
75 num_threads = omp_get_max_threads( );
77 if(verbose)std::cout <<
"# Type npx and npy\n";
79 if(verbose)std::cout <<
"# Computing with "
80 << np[0]<<
" x "<<np[1]<<
" processes x "
81 << num_threads<<
" threads = "
82 <<size*num_threads<<
" total"<<std::endl;
83 if( size != np[0]*np[1])
85 std::cerr <<
"ERROR: Process partition needs to match total number of processes!"<<std::endl;
86 MPI_Abort(MPI_COMM_WORLD, -1);
90 MPI_Bcast( np, 2, MPI_INT, 0, MPI_COMM_WORLD);
91 MPI_Cart_create( MPI_COMM_WORLD, 2, np, periods,
true, &comm);
93#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
95 cudaGetDeviceCount(&num_devices);
98 std::cerr <<
"# No CUDA capable devices found on rank "<<rank<<std::endl;
99 MPI_Abort(MPI_COMM_WORLD, -1);
102 int device = rank % num_devices;
103 if(verbose)std::cout <<
"# Rank "<<rank<<
" computes with device "<<device<<
" !"<<std::endl;
104 cudaSetDevice( device);
118static inline void mpi_init2d(
unsigned& n,
unsigned& Nx,
unsigned& Ny, MPI_Comm comm, std::istream& is = std::cin,
bool verbose =
true )
121 MPI_Comm_rank( comm, &rank);
124 if(verbose)std::cout <<
"# Type n, Nx and Ny\n";
126 if(verbose)std::cout<<
"# On the grid "<<n <<
" x "<<Nx<<
" x "<<Ny<<std::endl;
128 MPI_Bcast( &n,1 , MPI_UNSIGNED, 0, comm);
129 MPI_Bcast( &Nx,1 , MPI_UNSIGNED, 0, comm);
130 MPI_Bcast( &Ny,1 , MPI_UNSIGNED, 0, comm);
148static inline void mpi_init2d(
dg::bc bcx,
dg::bc bcy,
unsigned& n,
unsigned& Nx,
unsigned& Ny, MPI_Comm& comm, std::istream& is = std::cin,
bool verbose =
true )
171 MPI_Comm_rank( MPI_COMM_WORLD, &rank);
172 MPI_Comm_size( MPI_COMM_WORLD, &size);
173 int periods[3] = {
false,
false,
false};
174 if( bcx ==
dg::PER) periods[0] =
true;
175 if( bcy ==
dg::PER) periods[1] =
true;
176 if( bcz ==
dg::PER) periods[2] =
true;
182 num_threads = omp_get_max_threads( );
184 if(verbose) std::cout <<
"# Type npx and npy and npz\n";
185 is >> np[0] >> np[1]>>np[2];
186 if(verbose) std::cout <<
"# Computing with "
187 << np[0]<<
" x "<<np[1]<<
" x "<<np[2] <<
" processes x "
188 << num_threads<<
" threads = "
189 <<size*num_threads<<
" total"<<std::endl;
190 if( size != np[0]*np[1]*np[2])
192 std::cerr <<
"ERROR: Process partition needs to match total number of processes!"<<std::endl;
193 MPI_Abort(MPI_COMM_WORLD, -1);
197 MPI_Bcast( np, 3, MPI_INT, 0, MPI_COMM_WORLD);
198 MPI_Cart_create( MPI_COMM_WORLD, 3, np, periods,
true, &comm);
199#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
201 cudaGetDeviceCount(&num_devices);
204 std::cerr <<
"# No CUDA capable devices found on rank "<<rank<<std::endl;
205 MPI_Abort(MPI_COMM_WORLD, -1);
208 int device = rank % num_devices;
209 if(verbose)std::cout <<
"# Rank "<<rank<<
" computes with device "<<device<<
" !"<<std::endl;
210 cudaSetDevice( device);
225static inline void mpi_init3d(
unsigned& n,
unsigned& Nx,
unsigned& Ny,
unsigned& Nz, MPI_Comm comm, std::istream& is = std::cin,
bool verbose =
true )
228 MPI_Comm_rank( comm, &rank);
231 if(verbose)std::cout <<
"# Type n, Nx and Ny and Nz\n";
232 is >> n >> Nx >> Ny >> Nz;
233 if(verbose)std::cout<<
"# On the grid "<<n <<
" x "<<Nx<<
" x "<<Ny<<
" x "<<Nz<<std::endl;
235 MPI_Bcast( &n,1 , MPI_UNSIGNED, 0, MPI_COMM_WORLD);
236 MPI_Bcast( &Nx,1 , MPI_UNSIGNED, 0, MPI_COMM_WORLD);
237 MPI_Bcast( &Ny,1 , MPI_UNSIGNED, 0, MPI_COMM_WORLD);
238 MPI_Bcast( &Nz,1 , MPI_UNSIGNED, 0, MPI_COMM_WORLD);
258static inline void mpi_init3d(
dg::bc bcx,
dg::bc bcy,
dg::bc bcz,
unsigned& n,
unsigned& Nx,
unsigned& Ny,
unsigned& Nz, MPI_Comm& comm, std::istream& is = std::cin,
bool verbose =
true )
260 mpi_init3d( bcx, bcy, bcz, comm, is, verbose);
261 mpi_init3d( n, Nx, Ny, Nz, comm, is, verbose);
bc
Switch between boundary conditions.
Definition: enums.h:15
@ PER
periodic boundaries
Definition: enums.h:16
static void mpi_init2d(dg::bc bcx, dg::bc bcy, MPI_Comm &comm, std::istream &is=std::cin, bool verbose=true)
Read in number of processses and create Cartesian MPI communicator.
Definition: mpi_init.h:61
static void mpi_init3d(dg::bc bcx, dg::bc bcy, dg::bc bcz, MPI_Comm &comm, std::istream &is=std::cin, bool verbose=true)
Read in number of processses and create Cartesian MPI communicator.
Definition: mpi_init.h:168
static void mpi_init(int argc, char *argv[])
Convencience shortcut: Calls MPI_Init or MPI_Init_thread.
Definition: mpi_init.h:35
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...