doxygen/all__to__all_8hpp_source.html

 // Copyright (C) 2010-2013 von Karman Institute for Fluid Dynamics, Belgium

 //

 // This software is distributed under the terms of the

 // GNU Lesser General Public License version 3 (LGPLv3).

 // See doc/lgpl.txt and doc/gpl.txt for the license text.


 #ifndef cf3_common_PE_all_to_all_hpp

 #define cf3_common_PE_all_to_all_hpp


 #include "common/Assertions.hpp"

 #include "common/Foreach.hpp"

 #include "common/BasicExceptions.hpp"


 #include "common/PE/types.hpp"

 #include "common/PE/datatype.hpp"


 // #include "common/PE/debug.hpp"


 namespace cf3 {

   namespace common {

     namespace PE {


 namespace detail {


   template<typename T>

   inline void

   all_to_allc_impl(const Communicator& comm, const T* in_values, const int in_n, T* out_values, const  int stride )

   {

     // get data type and number of processors

     Datatype type = PE::get_mpi_datatype(*in_values);

     int nproc;

     MPI_CHECK_RESULT(MPI_Comm_size,(comm,&nproc));


     // if stride is greater than one

     cf3_assert( stride>0 );


     // set up out_buf

     T* out_buf=0;

     if (in_values==out_values) {

       if ( (out_buf=new T[nproc*in_n*stride+1]) == (T*)0 ) throw cf3::common::NotEnoughMemory(FromHere(),"Could not allocate temporary buffer."); // +1 for avoiding possible zero allocation

     } else {

       out_buf=out_values;

     }


     // do the communication

     MPI_CHECK_RESULT(MPI_Alltoall, (const_cast<T*>(in_values), in_n*stride, type, out_buf, in_n*stride, type, comm));


     // deal with out_buf

     if (in_values==out_values) {

       memcpy(out_values,out_buf,nproc*in_n*stride*sizeof(T));

       delete[] out_buf;

     }

   }


   template<typename T>

   inline void

   all_to_allvm_impl(const Communicator& comm, const T* in_values, const int *in_n, const int *in_map, T* out_values, const int *out_n, const int *out_map, const int stride )

   {

     // get data type and number of processors

     Datatype type = PE::get_mpi_datatype(*in_values);

     int nproc;

     MPI_CHECK_RESULT(MPI_Comm_size,(comm,&nproc));


     // if stride is smaller than one and unsupported functionality

     cf3_assert( stride>0 );


     // compute displacements both on send an receive side

     // also compute stride-multiplied send and receive counts

     int *in_nstride=new int[nproc];

     int *out_nstride=new int[nproc];

     int *in_disp=new int[nproc];

     int *out_disp=new int[nproc];

     in_disp[0]=0;

     out_disp[0]=0;

     for(int i=0; i<nproc-1; i++) {

       in_nstride[i]=stride*in_n[i];

       out_nstride[i]=stride*out_n[i];

       in_disp[i+1]=in_disp[i]+in_nstride[i];

       out_disp[i+1]=out_disp[i]+out_nstride[i];

     }

     in_nstride[nproc-1]=in_n[nproc-1]*stride;

     out_nstride[nproc-1]=out_n[nproc-1]*stride;


     // compute total number of send and receive items

     const int in_sum=in_disp[nproc-1]+stride*in_n[nproc-1];

     const int out_sum=out_disp[nproc-1]+stride*out_n[nproc-1];


     // set up in_buf

     T *in_buf=0;

     if (in_map!=0) {

       if ( (in_buf=new T[in_sum+1]) == (T*)0 ) throw cf3::common::NotEnoughMemory(FromHere(),"Could not allocate temporary buffer."); // +1 for avoiding possible zero allocation

       if (stride==1) { for(int i=0; i<in_sum; i++) in_buf[i]=in_values[in_map[i]]; }

       else { for(int i=0; i<in_sum/stride; i++) memcpy(&in_buf[stride*i],&in_values[stride*in_map[i]],stride*sizeof(T)); }

     } else {

       in_buf=(T*)in_values;

     }


     // set up out_buf

     T *out_buf=0;

     if ((out_map!=0)||(in_values==out_values)) {

       if ( (out_buf=new T[out_sum+1]) == (T*)0 ) throw cf3::common::NotEnoughMemory(FromHere(),"Could not allocate temporary buffer."); // +1 for avoiding possible zero allocation

     } else {

       out_buf=out_values;

     }


     // do the communication

     MPI_CHECK_RESULT(MPI_Alltoallv, (in_buf, in_nstride, in_disp, type, out_buf, out_nstride, out_disp, type, comm));


     // re-populate out_values

     if (out_map!=0) {

       if (stride==1) { for(int i=0; i<out_sum; i++) out_values[out_map[i]]=out_buf[i]; }

       else { for(int i=0; i<out_sum/stride; i++) memcpy(&out_values[stride*out_map[i]],&out_buf[stride*i],stride*sizeof(T)); }

       delete[] out_buf;

     } else if (in_values==out_values) {

       memcpy(out_values,out_buf,out_sum*sizeof(T));

       delete[] out_buf;

     }


     // free internal memory

     if (in_map!=0) delete[] in_buf;

     delete[] in_disp;

     delete[] out_disp;

     delete[] in_nstride;

     delete[] out_nstride;

   }


 } // end namespace detail


 template<typename T>

 inline T*

 all_to_all(const Communicator& comm, const T* in_values, const int in_n, T* out_values, const int stride=1)

 {

   // get nproc

   int nproc;

   MPI_CHECK_RESULT(MPI_Comm_size,(comm,&nproc));


   // allocate out_buf if incoming pointer is null

   T* out_buf=out_values;

   if (out_values==0) {

     const int size=stride*nproc*in_n>1?stride*nproc*in_n:1;

     if ( (out_buf=new T[size]) == (T*)0 ) throw cf3::common::NotEnoughMemory(FromHere(),"Could not allocate temporary buffer.");

   }


   // call c_impl

   detail::all_to_allc_impl(comm, in_values, in_n, out_buf, stride);

   return out_buf;

 }


 template<typename T>

 inline void

 all_to_all(const Communicator& comm, const std::vector<T>& in_values, std::vector<T>& out_values, const int stride=1)

 {

   // get number of processors

   int nproc;

   MPI_CHECK_RESULT(MPI_Comm_size,(comm,&nproc));


   // set out_values's sizes

   cf3_assert( in_values.size() % (nproc*stride) == 0 );

   out_values.resize(in_values.size());

   out_values.reserve(in_values.size());


   // call c_impl

   detail::all_to_allc_impl(comm, (T*)(&in_values[0]), in_values.size()/(nproc*stride), (T*)(&out_values[0]), stride);

 }


 //needs a forward

 template<typename T>

 inline T*

 all_to_all(const Communicator& comm, const T* in_values, const int *in_n, const int *in_map, T* out_values, int *out_n, const int *out_map, const int stride=1);


 template<typename T>

 inline T*

 all_to_all(const Communicator& comm, const T* in_values, const int *in_n, T* out_values, int *out_n, const int stride=1)

 {

   // call mapped variable all_to_all

   return all_to_all(comm,in_values,in_n,0,out_values,out_n,0,stride);

 }


 //needs a forward

 template<typename T>

 inline void

 all_to_all(const Communicator& comm, const std::vector<T>& in_values, const std::vector<int>& in_n, const std::vector<int>& in_map, std::vector<T>& out_values, std::vector<int>& out_n, const std::vector<int>& out_map, const int stride=1);


 template<typename T>

 inline void

 all_to_all(const Communicator& comm, const std::vector<T>& in_values, const std::vector<int>& in_n, std::vector<T>& out_values, std::vector<int>& out_n, const int stride=1)

 {

   // call mapped variable all_to_all

   std::vector<int> in_map(0);

   std::vector<int> out_map(0);

   if (&in_values[0]==&out_values[0])

   {

     std::vector<T> out_tmp(0);

     all_to_all(comm,in_values,in_n,in_map,out_tmp,out_n,out_map,stride);

     out_values.assign(out_tmp.begin(),out_tmp.end());

   }

   else

   {

     all_to_all(comm,in_values,in_n,in_map,out_values,out_n,out_map,stride);

   }

 }


 template<typename T>

 inline T*

 all_to_all(const Communicator& comm, const T* in_values, const int *in_n, const int *in_map, T* out_values, int *out_n, const int *out_map, const int stride)

 {

   // number of processes

   int nproc;

   MPI_CHECK_RESULT(MPI_Comm_size,(comm,&nproc));


   // if out_n consist of -1s then communicate for number of receives

   int out_sum=0;

   for (int i=0; i<nproc; i++) out_sum+=out_n[i];

   if (out_sum==-nproc) {

     if (out_map!=0) throw cf3::common::ParallelError(FromHere(),"Trying to perform communication with receive map while receive counts are unknown, this is bad usage of parallel environment.");

     detail::all_to_allc_impl(comm,in_n,1,out_n,1);

     out_sum=0;

     for (int i=0; i<nproc; i++) out_sum+=out_n[i];

   }


   // allocate out_buf if incoming pointer is null

   T* out_buf=out_values;

   if (out_values==0) {

     if (out_map!=0){

       int out_sum_tmp=0;

       for (int i=0; i<out_sum; i++) out_sum_tmp=out_map[i]>out_sum_tmp?out_map[i]:out_sum_tmp;

       out_sum=out_sum_tmp+1;

     }

     if ( (out_buf=new T[stride*out_sum]) == (T*)0 ) throw cf3::common::NotEnoughMemory(FromHere(),"Could not allocate temporary buffer.");

   }


   // call vm_impl

   detail::all_to_allvm_impl(comm, in_values, in_n, in_map, out_buf, out_n, out_map, stride);

   return out_buf;

 }


 template<typename T>

 inline void

 all_to_all(const Communicator& comm, const std::vector<T>& in_values, const std::vector<int>& in_n, const std::vector<int>& in_map, std::vector<T>& out_values, std::vector<int>& out_n, const std::vector<int>& out_map, const int stride)

 {

   // number of processes and checking in_n and out_n (out_n deliberately throws exception because the vector can arrive from arbitrary previous usage)

   int nproc;

   MPI_CHECK_RESULT(MPI_Comm_size,(comm,&nproc));

   cf3_assert( (int)in_n.size() == nproc );

   if ((int)out_n.size()!=nproc) cf3::common::BadValue(FromHere(),"Size of vector for number of items to be received does not match to number of processes.");


   // compute number of send and receive

   int in_sum=0;

   int out_sum=0;

   boost_foreach( int i, in_n ) in_sum+=i;

   boost_foreach( int i, out_n ) out_sum+=i;


   // if necessary, do communication for out_n

   if (out_sum == -nproc){

     if (out_map.size()!=0) throw cf3::common::ParallelError(FromHere(),"Trying to perform communication with receive map while receive counts are unknown, this is bad usage of parallel environment.");

     detail::all_to_allc_impl(comm,&in_n[0],1,&out_n[0],1);

     out_sum=0;

     boost_foreach( int & i, out_n ) out_sum+=i;

   }


   // resize out_values if vector size is zero

   if (out_values.size() == 0 ){

     if (out_map.size()!=0) {

       out_sum=0;

       boost_foreach( int i, out_map ) out_sum=i>out_sum?i:out_sum;

       if (out_sum!=0) out_sum++;

     }

     out_values.resize(stride*out_sum);

     out_values.reserve(stride*out_sum);

   }


   // call vm_impl

   detail::all_to_allvm_impl(comm, (T*)(&in_values[0]), &in_n[0], (in_map.empty() ? nullptr : &in_map[0]), (T*)(&out_values[0]), &out_n[0], (out_map.empty() ? nullptr : &out_map[0]), stride);

 }


 template <typename T>

 void all_to_all(const Communicator& comm, const std::vector<std::vector<T> >& send, std::vector<std::vector<T> >& recv)

 {

   std::vector<int> send_strides(send.size());

   std::vector<int> send_displs(send.size());

   for (Uint i=0; i<send.size(); ++i)

     send_strides[i] = send[i].size();


   send_displs[0] = 0;

   for (Uint i=1; i<send.size(); ++i)

     send_displs[i] = send_displs[i-1] + send_strides[i-1];


   std::vector<T> send_linear(send_displs.back()+send_strides.back());

   for (Uint i=0; i<send.size(); ++i)

     for (Uint j=0; j<send[i].size(); ++j)

       send_linear[send_displs[i]+j] = send[i][j];


   std::vector<int> recv_strides(send.size());

   std::vector<int> recv_displs(send.size());

   all_to_all(comm,send_strides,recv_strides);

   recv_displs[0] = 0;

   for (Uint i=1; i<send.size(); ++i)

     recv_displs[i] = recv_displs[i-1] + recv_strides[i-1];


   std::vector<T> recv_linear(recv_displs.back()+recv_strides.back());

   MPI_CHECK_RESULT(MPI_Alltoallv, (&send_linear[0], &send_strides[0], &send_displs[0], PE::get_mpi_datatype<T>(), &recv_linear[0], &recv_strides[0], &recv_displs[0], get_mpi_datatype<T>(), comm));


   recv.resize(recv_strides.size());

   for (Uint i=0; i<recv_strides.size(); ++i)

   {

     recv[i].resize(recv_strides[i]);

     for (Uint j=0; j<recv_strides[i]; ++j)

     {

       recv[i][j]=recv_linear[recv_displs[i]+j];

     }

   }

 }


 } // namespace PE

 } // namespace common

 } // namespace cf3


 #endif // cf3_common_PE_all_to_all_hpp

cf3::common::NotEnoughMemory
Definition: BasicExceptions.hpp:295

Foreach.hpp

atest-ufem-heat2d-disk.T
list T
Definition: atest-ufem-heat2d-disk.py:36

cf3::common::PE::detail::all_to_allc_impl
void all_to_allc_impl(const Communicator &comm, const T *in_values, const int in_n, T *out_values, const int stride)
Definition: all_to_all.hpp:60

cf3_assert
#define cf3_assert(a)
Definition: Assertions.hpp:93

cf3::common::PE::Datatype
MPI_Datatype Datatype
datatype
Definition: types.hpp:47

cf3::common::PE::get_mpi_datatype
Datatype get_mpi_datatype(const T &ref_of_type)
ACCESS AND REGISTRATION MECHANISM.
Definition: datatype.hpp:49

boost_foreach
#define boost_foreach
lowercase version of BOOST_FOREACH
Definition: Foreach.hpp:16

cf3::common::BadValue
Definition: BasicExceptions.hpp:36

types.hpp

cf3::common::PE::all_to_all
T * all_to_all(const Communicator &comm, const T *in_values, const int in_n, T *out_values, const int stride=1)
Definition: all_to_all.hpp:192

cf3
Top-level namespace for coolfluid.
Definition: Action.cpp:18

cf3::common::ParallelError
Definition: BasicExceptions.hpp:187

cf3::common::PE::detail::all_to_allvm_impl
void all_to_allvm_impl(const Communicator &comm, const T *in_values, const int *in_n, const int *in_map, T *out_values, const int *out_n, const int *out_map, const int stride)
Definition: all_to_all.hpp:105

cf3::common::PE::Communicator
MPI_Comm Communicator
communicator
Definition: types.hpp:41

cf3::Uint
unsigned int Uint
typedef for unsigned int
Definition: CF.hpp:90

BasicExceptions.hpp

Assertions.hpp

MPI_CHECK_RESULT
#define MPI_CHECK_RESULT(MPIFunc, Args)
Macro for checking return values of any mpi calls and throws exception on error.
Definition: types.hpp:20

datatype.hpp

FromHere
#define FromHere()
Definition: CodeLocation.hpp:45