Discontinuous Galerkin Library
#include "dg/algorithm.h"
mpi_init.h
Go to the documentation of this file.
1#pragma once
2
3#include <iostream>
4#include <cassert>
5#include <thrust/host_vector.h>
6#include <thrust/device_vector.h> //declare THRUST_DEVICE_SYSTEM
7#include "../enums.h"
8
15namespace dg
16{
17
35static inline void mpi_init( int argc, char* argv[])
36{
37#ifdef _OPENMP
38 int provided, error;
39 error = MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
40 assert( error == MPI_SUCCESS && "Threaded MPI lib required!\n");
41#else
42 MPI_Init(&argc, &argv);
43#endif
44}
45
61static inline void mpi_init2d( dg::bc bcx, dg::bc bcy, MPI_Comm& comm, std::istream& is = std::cin, bool verbose = true )
62{
63 int rank, size;
64 MPI_Comm_rank( MPI_COMM_WORLD, &rank);
65 MPI_Comm_size( MPI_COMM_WORLD, &size);
66 if(rank==0 && verbose)std::cout << "# MPI v"<<MPI_VERSION<<"."<<MPI_SUBVERSION<<std::endl;
67 int periods[2] = {false,false};
68 if( bcx == dg::PER) periods[0] = true;
69 if( bcy == dg::PER) periods[1] = true;
70 int np[2];
71 if( rank == 0)
72 {
73 int num_threads = 1;
74#ifdef _OPENMP
75 num_threads = omp_get_max_threads( );
76#endif //omp
77 if(verbose)std::cout << "# Type npx and npy\n";
78 is >> np[0] >> np[1];
79 if(verbose)std::cout << "# Computing with "
80 << np[0]<<" x "<<np[1]<<" processes x "
81 << num_threads<<" threads = "
82 <<size*num_threads<<" total"<<std::endl;
83 if( size != np[0]*np[1])
84 {
85 std::cerr << "ERROR: Process partition needs to match total number of processes!"<<std::endl;
86 MPI_Abort(MPI_COMM_WORLD, -1);
87 exit(-1);
88 }
89 }
90 MPI_Bcast( np, 2, MPI_INT, 0, MPI_COMM_WORLD);
91 MPI_Cart_create( MPI_COMM_WORLD, 2, np, periods, true, &comm);
92
93#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
94 int num_devices=0;
95 cudaGetDeviceCount(&num_devices);
96 if(num_devices == 0)
97 {
98 std::cerr << "# No CUDA capable devices found on rank "<<rank<<std::endl;
99 MPI_Abort(MPI_COMM_WORLD, -1);
100 exit(-1);
101 }
102 int device = rank % num_devices; //assume # of gpus/node is fixed
103 if(verbose)std::cout << "# Rank "<<rank<<" computes with device "<<device<<" !"<<std::endl;
104 cudaSetDevice( device);
105#endif//THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
106}
118static inline void mpi_init2d(unsigned& n, unsigned& Nx, unsigned& Ny, MPI_Comm comm, std::istream& is = std::cin, bool verbose = true )
119{
120 int rank;
121 MPI_Comm_rank( comm, &rank);
122 if( rank == 0)
123 {
124 if(verbose)std::cout << "# Type n, Nx and Ny\n";
125 is >> n >> Nx >> Ny;
126 if(verbose)std::cout<< "# On the grid "<<n <<" x "<<Nx<<" x "<<Ny<<std::endl;
127 }
128 MPI_Bcast( &n,1 , MPI_UNSIGNED, 0, comm);
129 MPI_Bcast( &Nx,1 , MPI_UNSIGNED, 0, comm);
130 MPI_Bcast( &Ny,1 , MPI_UNSIGNED, 0, comm);
131}
132
148static inline void mpi_init2d( dg::bc bcx, dg::bc bcy, unsigned& n, unsigned& Nx, unsigned& Ny, MPI_Comm& comm, std::istream& is = std::cin, bool verbose = true )
149{
150 mpi_init2d( bcx, bcy, comm, is, verbose);
151 mpi_init2d( n, Nx, Ny, comm, is, verbose);
152}
153
154
168static inline void mpi_init3d( dg::bc bcx, dg::bc bcy, dg::bc bcz, MPI_Comm& comm, std::istream& is = std::cin, bool verbose = true )
169{
170 int rank, size;
171 MPI_Comm_rank( MPI_COMM_WORLD, &rank);
172 MPI_Comm_size( MPI_COMM_WORLD, &size);
173 int periods[3] = {false,false, false};
174 if( bcx == dg::PER) periods[0] = true;
175 if( bcy == dg::PER) periods[1] = true;
176 if( bcz == dg::PER) periods[2] = true;
177 int np[3];
178 if( rank == 0)
179 {
180 int num_threads = 1;
181#ifdef _OPENMP
182 num_threads = omp_get_max_threads( );
183#endif //omp
184 if(verbose) std::cout << "# Type npx and npy and npz\n";
185 is >> np[0] >> np[1]>>np[2];
186 if(verbose) std::cout << "# Computing with "
187 << np[0]<<" x "<<np[1]<<" x "<<np[2] << " processes x "
188 << num_threads<<" threads = "
189 <<size*num_threads<<" total"<<std::endl;
190 if( size != np[0]*np[1]*np[2])
191 {
192 std::cerr << "ERROR: Process partition needs to match total number of processes!"<<std::endl;
193 MPI_Abort(MPI_COMM_WORLD, -1);
194 exit(-1);
195 }
196 }
197 MPI_Bcast( np, 3, MPI_INT, 0, MPI_COMM_WORLD);
198 MPI_Cart_create( MPI_COMM_WORLD, 3, np, periods, true, &comm);
199#if THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
200 int num_devices=0;
201 cudaGetDeviceCount(&num_devices);
202 if(num_devices == 0)
203 {
204 std::cerr << "# No CUDA capable devices found on rank "<<rank<<std::endl;
205 MPI_Abort(MPI_COMM_WORLD, -1);
206 exit(-1);
207 }
208 int device = rank % num_devices; //assume # of gpus/node is fixed
209 if(verbose)std::cout << "# Rank "<<rank<<" computes with device "<<device<<" !"<<std::endl;
210 cudaSetDevice( device);
211#endif//THRUST_DEVICE_SYSTEM==THRUST_DEVICE_SYSTEM_CUDA
212}
225static inline void mpi_init3d(unsigned& n, unsigned& Nx, unsigned& Ny, unsigned& Nz, MPI_Comm comm, std::istream& is = std::cin, bool verbose = true )
226{
227 int rank;
228 MPI_Comm_rank( comm, &rank);
229 if( rank == 0)
230 {
231 if(verbose)std::cout << "# Type n, Nx and Ny and Nz\n";
232 is >> n >> Nx >> Ny >> Nz;
233 if(verbose)std::cout<< "# On the grid "<<n <<" x "<<Nx<<" x "<<Ny<<" x "<<Nz<<std::endl;
234 }
235 MPI_Bcast( &n,1 , MPI_UNSIGNED, 0, MPI_COMM_WORLD);
236 MPI_Bcast( &Nx,1 , MPI_UNSIGNED, 0, MPI_COMM_WORLD);
237 MPI_Bcast( &Ny,1 , MPI_UNSIGNED, 0, MPI_COMM_WORLD);
238 MPI_Bcast( &Nz,1 , MPI_UNSIGNED, 0, MPI_COMM_WORLD);
239}
240
258static inline void mpi_init3d( dg::bc bcx, dg::bc bcy, dg::bc bcz, unsigned& n, unsigned& Nx, unsigned& Ny, unsigned& Nz, MPI_Comm& comm, std::istream& is = std::cin, bool verbose = true )
259{
260 mpi_init3d( bcx, bcy, bcz, comm, is, verbose);
261 mpi_init3d( n, Nx, Ny, Nz, comm, is, verbose);
262}
263} //namespace dg
bc
Switch between boundary conditions.
Definition: enums.h:15
@ PER
periodic boundaries
Definition: enums.h:16
static void mpi_init2d(dg::bc bcx, dg::bc bcy, MPI_Comm &comm, std::istream &is=std::cin, bool verbose=true)
Read in number of processses and create Cartesian MPI communicator.
Definition: mpi_init.h:61
static void mpi_init3d(dg::bc bcx, dg::bc bcy, dg::bc bcz, MPI_Comm &comm, std::istream &is=std::cin, bool verbose=true)
Read in number of processses and create Cartesian MPI communicator.
Definition: mpi_init.h:168
static void mpi_init(int argc, char *argv[])
Convencience shortcut: Calls MPI_Init or MPI_Init_thread.
Definition: mpi_init.h:35
This is the namespace for all functions and classes defined and used by the discontinuous Galerkin li...