26#include "ExSUM.FPE.hpp"
61template<
typename CACHE,
typename Po
interOrValue1,
typename Po
interOrValue2>
62void ExDOTFPE_cpu(
int N, PointerOrValue1 a, PointerOrValue2 b, int64_t* acc,
bool* error) {
65 int r = (( int64_t(N) ) & ~7ul);
66 for(
int i = 0; i < r; i+=8) {
73 vcl::Vec8d x = make_vcl_vec8d(a,i)* make_vcl_vec8d(b,i);
74 vcl::Vec8db finite = vcl::is_finite( x);
75 if( !vcl::horizontal_and( finite) ) *error =
true;
84 vcl::Vec8d x = make_vcl_vec8d(a,r,N-r)*make_vcl_vec8d(b,r,N-r);
85 vcl::Vec8db finite = vcl::is_finite( x);
86 if( !vcl::horizontal_and( finite) ) *error =
true;
91 for(
int i = 0; i < N; i++) {
94 double x = (double)get_element(a,i)*(double)get_element(b,i);
95 if( !std::isfinite(x) ) *error =
true;
103template<
typename CACHE,
typename Po
interOrValue1,
typename Po
interOrValue2,
typename Po
interOrValue3>
104void ExDOTFPE_cpu(
int N, PointerOrValue1 a, PointerOrValue2 b, PointerOrValue3 c, int64_t* acc,
bool* error) {
107 int r = (( int64_t(N)) & ~7ul);
108 for(
int i = 0; i < r; i+=8) {
117 vcl::Vec8d x1 = vcl::mul_add(make_vcl_vec8d(a,i),make_vcl_vec8d(b,i), 0);
118 vcl::Vec8d x2 = vcl::mul_add( x1 ,make_vcl_vec8d(c,i), 0);
119 vcl::Vec8db finite = vcl::is_finite( x2);
120 if( !vcl::horizontal_and( finite) ) *error =
true;
121 cache.Accumulate(x2);
132 vcl::Vec8d x1 = vcl::mul_add(make_vcl_vec8d(a,r,N-r),make_vcl_vec8d(b,r,N-r), 0);
133 vcl::Vec8d x2 = vcl::mul_add( x1 ,make_vcl_vec8d(c,r,N-r), 0);
134 vcl::Vec8db finite = vcl::is_finite( x2);
135 if( !vcl::horizontal_and( finite) ) *error =
true;
136 cache.Accumulate(x2);
143 for(
int i = 0; i < N; i++) {
144 double x1 = (double)get_element(a,i)*(double)get_element(b,i);
145 double x2 = x1*(double)get_element(c,i);
146 if( !std::isfinite(x2) ) *error =
true;
147 cache.Accumulate(x2);
210template<
class Po
interOrValue1,
class Po
interOrValue2,
size_t NBFPE=8>
211void exdot_cpu(
unsigned size, PointerOrValue1 x1_ptr, PointerOrValue2 x2_ptr, int64_t* h_superacc,
int* status){
212 static_assert( has_floating_value<PointerOrValue1>::value,
"PointerOrValue1 needs to be T or T* with T one of (const) float or (const) double");
213 static_assert( has_floating_value<PointerOrValue2>::value,
"PointerOrValue2 needs to be T or T* with T one of (const) float or (const) double");
214 for(
int i=0; i<exblas::BIN_COUNT; i++)
218 cpu::ExDOTFPE_cpu<cpu::FPExpansionVect<vcl::Vec8d, NBFPE, cpu::FPExpansionTraits<true> > >((int)size,x1_ptr,x2_ptr, h_superacc, &error);
220 cpu::ExDOTFPE_cpu<cpu::FPExpansionVect<double, NBFPE, cpu::FPExpansionTraits<true> > >((int)size,x1_ptr,x2_ptr, h_superacc, &error);
223 if( error ) *status = 1;
229template<
class Po
interOrValue1,
class Po
interOrValue2,
class Po
interOrValue3,
size_t NBFPE=8>
230void exdot_cpu(
unsigned size, PointerOrValue1 x1_ptr, PointerOrValue2 x2_ptr, PointerOrValue3 x3_ptr, int64_t* h_superacc,
int* status) {
231 static_assert( has_floating_value<PointerOrValue1>::value,
"PointerOrValue1 needs to be T or T* with T one of (const) float or (const) double");
232 static_assert( has_floating_value<PointerOrValue2>::value,
"PointerOrValue2 needs to be T or T* with T one of (const) float or (const) double");
233 static_assert( has_floating_value<PointerOrValue3>::value,
"PointerOrValue3 needs to be T or T* with T one of (const) float or (const) double");
234 for(
int i=0; i<exblas::BIN_COUNT; i++)
238 cpu::ExDOTFPE_cpu<cpu::FPExpansionVect<vcl::Vec8d, NBFPE, cpu::FPExpansionTraits<true> > >((int)size,x1_ptr,x2_ptr, x3_ptr, h_superacc, &error);
240 cpu::ExDOTFPE_cpu<cpu::FPExpansionVect<double, NBFPE, cpu::FPExpansionTraits<true> > >((int)size,x1_ptr,x2_ptr, x3_ptr, h_superacc, &error);
243 if( error ) *status = 1;
Primitives for accumulation into superaccumulator.
void exdot_cpu(unsigned size, PointerOrValue1 x1_ptr, PointerOrValue2 x2_ptr, int64_t *h_superacc, int *status)
Serial version of exact dot product.
Definition exdot_serial.h:211
Utility union to display all bits of a double (using type-punning)
Definition exdot_serial.h:40
int64_t i
a 64 bit integer
Definition exdot_serial.h:42
double d
a double
Definition exdot_serial.h:41
Utility union to display all bits of a float (using type-punning)
Definition exdot_serial.h:53
float f
a float
Definition exdot_serial.h:54
int32_t i
a 32 bit integer
Definition exdot_serial.h:55