Extension: ExBLAS
#include "dg/algorithm.h" (or as a standalone library as "dg/exblas/exblas.h")
Loading...
Searching...
No Matches
mpi_accumulate.h
Go to the documentation of this file.
1
8#pragma once
9#include <mpi.h>
10#include <array>
11#include <vector>
12#include <map>
13#include "accumulate.h"
14
15namespace dg
16{
17namespace exblas {
18
42inline void mpi_reduce_communicator(MPI_Comm comm,
43 MPI_Comm* comm_mod, MPI_Comm* comm_mod_reduce){
44 //we keep track of communicators that were created in the past
45 // (static function variables are constructed the program
46 // encounters them and destruced at program termination)
47 static std::map<MPI_Comm, std::array<MPI_Comm, 2>> comm_mods;
48 assert( comm != MPI_COMM_NULL);
49 if( comm_mods.count(comm) == 1 )
50 {
51 *comm_mod = comm_mods[comm][0];
52 *comm_mod_reduce = comm_mods[comm][1];
53 return;
54 }
55 int rank, size;
56 MPI_Comm_rank( comm, &rank);
57 MPI_Comm_size( comm, &size);
58 // For example
59 // xxxxxxxxxxxxxxxx // 16 (comm_mod)
60 // xxxxxxxxxxxxxxxx // 16
61 // xxxxxx // 6
62 // 3333332222222222 // (comm_mod_reduce)
63 int mod = 128;
64 // 0-127, 128-255, ...
65 MPI_Comm_split( comm, rank/mod, rank%mod, comm_mod); //collective call
66 // Here we split rank%mod instead of returning MPI_COMM_NULL for != 0
67 // https://github.com/open-mpi/ompi/issues/13081
68 MPI_Comm_split( comm, rank%mod, rank, comm_mod_reduce);
69
70 comm_mods[comm] = {*comm_mod, *comm_mod_reduce};
71}
72
94inline void reduce_mpi_cpu( unsigned num_superacc, int64_t* in, int64_t* out,
95MPI_Comm comm, MPI_Comm comm_mod, MPI_Comm comm_mod_reduce )
96{
97 for( unsigned i=0; i<num_superacc; i++)
98 {
99 int imin=exblas::IMIN, imax=exblas::IMAX;
100 cpu::Normalize(&in[i*exblas::BIN_COUNT], imin, imax);
101 }
102
103 MPI_Reduce(in, out, num_superacc*exblas::BIN_COUNT, MPI_LONG, MPI_SUM, 0,
104 comm_mod);
105 int rank;
106 MPI_Comm_rank( comm_mod, &rank);
107 if(rank == 0)
108 {
109 int size;
110 MPI_Comm_size( comm_mod_reduce, &size);
111 if( size > 1)
112 {
113 for( unsigned i=0; i<num_superacc; i++)
114 {
115 int imin=exblas::IMIN, imax=exblas::IMAX;
116 cpu::Normalize(&out[i*exblas::BIN_COUNT], imin, imax);
117 for( int k=0; k<exblas::BIN_COUNT; k++)
118 in[i*BIN_COUNT+k] = out[i*BIN_COUNT+k];
119 }
120 MPI_Reduce(in, out, num_superacc*exblas::BIN_COUNT, MPI_LONG,
121 MPI_SUM, 0, comm_mod_reduce);
122 }
123 }
124 MPI_Bcast( out, num_superacc*exblas::BIN_COUNT, MPI_LONG, 0, comm);
125}
126
127}//namespace exblas
128} //namespace dg
Primitives for accumulation into superaccumulator.
bool Normalize(int64_t *accumulator, int &imin, int &imax)
Normalize a superaccumulator.
Definition accumulate.h:267
void mpi_reduce_communicator(MPI_Comm comm, MPI_Comm *comm_mod, MPI_Comm *comm_mod_reduce)
This function can be used to partition communicators for the exblas::reduce_mpi_cpu function.
Definition mpi_accumulate.h:42
void reduce_mpi_cpu(unsigned num_superacc, int64_t *in, int64_t *out, MPI_Comm comm, MPI_Comm comm_mod, MPI_Comm comm_mod_reduce)
reduce a number of superaccumulators distributed among mpi processes
Definition mpi_accumulate.h:94