Extension: ExBLAS
#include "dg/algorithm.h" (or as a standalone library as "dg/exblas/exblas.h")
mpi_accumulate.h
Go to the documentation of this file.
1
8#pragma once
9#include <mpi.h>
10#include <array>
11#include <vector>
12#include <map>
13#include "accumulate.h"
14
15namespace dg
16{
17namespace exblas {
18
20namespace detail{
21//we keep track of communicators that were created in the past
22static std::map<MPI_Comm, std::array<MPI_Comm, 2>> comm_mods;
23}
25
35static void mpi_reduce_communicator(MPI_Comm comm, MPI_Comm* comm_mod, MPI_Comm* comm_mod_reduce){
36 assert( comm != MPI_COMM_NULL);
37 if( detail::comm_mods.count(comm) == 1 )
38 {
39 *comm_mod = detail::comm_mods[comm][0];
40 *comm_mod_reduce = detail::comm_mods[comm][1];
41 return;
42 }
43 else
44 {
45 int mod = 128;
46 int rank, size;
47 MPI_Comm_rank( comm, &rank);
48 MPI_Comm_size( comm, &size);
49 MPI_Comm_split( comm, rank/mod, rank%mod, comm_mod); //collective call
50 MPI_Group group, reduce_group;
51 MPI_Comm_group( comm, &group); //local call
52 int reduce_size=(int)ceil((double)size/(double)mod);
53 std::vector<int> reduce_ranks(reduce_size);
54 for( int i=0; i<reduce_size; i++)
55 reduce_ranks[i] = i*mod;
56 MPI_Group_incl( group, reduce_size, reduce_ranks.data(), &reduce_group); //local
57 MPI_Comm_create( comm, reduce_group, comm_mod_reduce); //collective
58 MPI_Group_free( &group);
59 MPI_Group_free( &reduce_group);
60 detail::comm_mods[comm] = {*comm_mod, *comm_mod_reduce};
61 //returns MPI_COMM_NULL to processes that are not in the group
62 }
63}
64
79static void reduce_mpi_cpu( unsigned num_superacc, int64_t* in, int64_t* out, MPI_Comm comm, MPI_Comm comm_mod, MPI_Comm comm_mod_reduce )
80{
81 for( unsigned i=0; i<num_superacc; i++)
82 {
83 int imin=exblas::IMIN, imax=exblas::IMAX;
84 cpu::Normalize(&in[i*exblas::BIN_COUNT], imin, imax);
85 }
86 MPI_Reduce(in, out, num_superacc*exblas::BIN_COUNT, MPI_LONG, MPI_SUM, 0, comm_mod);
87 int rank;
88 MPI_Comm_rank( comm_mod, &rank);
89 if(comm_mod_reduce != MPI_COMM_NULL)
90 {
91 for( unsigned i=0; i<num_superacc; i++)
92 {
93 int imin=exblas::IMIN, imax=exblas::IMAX;
94 cpu::Normalize(&out[i*exblas::BIN_COUNT], imin, imax);
95 for( int k=0; k<exblas::BIN_COUNT; k++)
96 in[i*BIN_COUNT+k] = out[i*BIN_COUNT+k];
97 }
98 MPI_Reduce(in, out, num_superacc*exblas::BIN_COUNT, MPI_LONG, MPI_SUM, 0, comm_mod_reduce);
99 }
100 MPI_Bcast( out, num_superacc*exblas::BIN_COUNT, MPI_LONG, 0, comm);
101}
102
103}//namespace exblas
104} //namespace dg
Primitives for accumulation into superaccumulator.
static bool Normalize(int64_t *accumulator, int &imin, int &imax)
Normalize a superaccumulator.
Definition: accumulate.h:171
static void mpi_reduce_communicator(MPI_Comm comm, MPI_Comm *comm_mod, MPI_Comm *comm_mod_reduce)
This function can be used to partition communicators for the exblas::reduce_mpi_cpu function.
Definition: mpi_accumulate.h:35
static void reduce_mpi_cpu(unsigned num_superacc, int64_t *in, int64_t *out, MPI_Comm comm, MPI_Comm comm_mod, MPI_Comm comm_mod_reduce)
reduce a number of superaccumulators distributed among mpi processes
Definition: mpi_accumulate.h:79