26#include "ExSUM.FPE.hpp"
48template<
typename CACHE,
typename Po
interOrValue1,
typename Po
interOrValue2>
49void ExDOTFPE_cpu(
int N, PointerOrValue1 a, PointerOrValue2 b, int64_t* acc,
bool* error) {
52 int r = (( int64_t(N) ) & ~7ul);
53 for(
int i = 0; i < r; i+=8) {
60 vcl::Vec8d x = make_vcl_vec8d(a,i)* make_vcl_vec8d(b,i);
61 vcl::Vec8db finite = vcl::is_finite( x);
62 if( !vcl::horizontal_and( finite) ) *error =
true;
71 vcl::Vec8d x = make_vcl_vec8d(a,r,N-r)*make_vcl_vec8d(b,r,N-r);
72 vcl::Vec8db finite = vcl::is_finite( x);
73 if( !vcl::horizontal_and( finite) ) *error =
true;
78 for(
int i = 0; i < N; i++) {
81 double x = get_element(a,i)*get_element(b,i);
82 if( !std::isfinite(x) ) *error =
true;
90template<
typename CACHE,
typename Po
interOrValue1,
typename Po
interOrValue2,
typename Po
interOrValue3>
91void ExDOTFPE_cpu(
int N, PointerOrValue1 a, PointerOrValue2 b, PointerOrValue3 c, int64_t* acc,
bool* error) {
94 int r = (( int64_t(N)) & ~7ul);
95 for(
int i = 0; i < r; i+=8) {
104 vcl::Vec8d x1 = vcl::mul_add(make_vcl_vec8d(a,i),make_vcl_vec8d(b,i), 0);
105 vcl::Vec8d x2 = vcl::mul_add( x1 ,make_vcl_vec8d(c,i), 0);
106 vcl::Vec8db finite = vcl::is_finite( x2);
107 if( !vcl::horizontal_and( finite) ) *error =
true;
108 cache.Accumulate(x2);
119 vcl::Vec8d x1 = vcl::mul_add(make_vcl_vec8d(a,r,N-r),make_vcl_vec8d(b,r,N-r), 0);
120 vcl::Vec8d x2 = vcl::mul_add( x1 ,make_vcl_vec8d(c,r,N-r), 0);
121 vcl::Vec8db finite = vcl::is_finite( x2);
122 if( !vcl::horizontal_and( finite) ) *error =
true;
123 cache.Accumulate(x2);
130 for(
int i = 0; i < N; i++) {
131 double x1 = get_element(a,i)*get_element(b,i);
132 double x2 = x1*get_element(c,i);
133 if( !std::isfinite(x2) ) *error =
true;
134 cache.Accumulate(x2);
199template<
class Po
interOrValue1,
class Po
interOrValue2,
size_t NBFPE=8>
200void exdot_cpu(
unsigned size, PointerOrValue1 x1_ptr, PointerOrValue2 x2_ptr, int64_t* h_superacc,
int* status){
201 static_assert( has_floating_value<PointerOrValue1>::value,
"PointerOrValue1 needs to be T or T* with T one of (const) float or (const) double");
202 static_assert( has_floating_value<PointerOrValue2>::value,
"PointerOrValue2 needs to be T or T* with T one of (const) float or (const) double");
203 for(
int i=0; i<exblas::BIN_COUNT; i++)
207 cpu::ExDOTFPE_cpu<cpu::FPExpansionVect<vcl::Vec8d, NBFPE, cpu::FPExpansionTraits<true> > >((int)size,x1_ptr,x2_ptr, h_superacc, &error);
209 cpu::ExDOTFPE_cpu<cpu::FPExpansionVect<double, NBFPE, cpu::FPExpansionTraits<true> > >((int)size,x1_ptr,x2_ptr, h_superacc, &error);
212 if( error ) *status = 1;
218template<
class Po
interOrValue1,
class Po
interOrValue2,
class Po
interOrValue3,
size_t NBFPE=8>
219void exdot_cpu(
unsigned size, PointerOrValue1 x1_ptr, PointerOrValue2 x2_ptr, PointerOrValue3 x3_ptr, int64_t* h_superacc,
int* status) {
220 static_assert( has_floating_value<PointerOrValue1>::value,
"PointerOrValue1 needs to be T or T* with T one of (const) float or (const) double");
221 static_assert( has_floating_value<PointerOrValue2>::value,
"PointerOrValue2 needs to be T or T* with T one of (const) float or (const) double");
222 static_assert( has_floating_value<PointerOrValue3>::value,
"PointerOrValue3 needs to be T or T* with T one of (const) float or (const) double");
223 for(
int i=0; i<exblas::BIN_COUNT; i++)
227 cpu::ExDOTFPE_cpu<cpu::FPExpansionVect<vcl::Vec8d, NBFPE, cpu::FPExpansionTraits<true> > >((int)size,x1_ptr,x2_ptr, x3_ptr, h_superacc, &error);
229 cpu::ExDOTFPE_cpu<cpu::FPExpansionVect<double, NBFPE, cpu::FPExpansionTraits<true> > >((int)size,x1_ptr,x2_ptr, x3_ptr, h_superacc, &error);
232 if( error ) *status = 1;
Primitives for accumulation into superaccumulator.
void exdot_cpu(unsigned size, PointerOrValue1 x1_ptr, PointerOrValue2 x2_ptr, int64_t *h_superacc, int *status)
Serial version of exact dot product.
Definition: exdot_serial.h:200
Utility union to display all bits of a double (using type-punning)
Definition: exdot_serial.h:40
int64_t i
a 64 bit integer
Definition: exdot_serial.h:42
double d
a double
Definition: exdot_serial.h:41