32inline vcl::Vec8d make_vcl_vec8d(
double x,
int i){
35inline vcl::Vec8d make_vcl_vec8d(
const double* x,
int i){
36 return vcl::Vec8d().load( x+i);
38inline vcl::Vec8d make_vcl_vec8d(
double x,
int i,
int num){
41inline vcl::Vec8d make_vcl_vec8d(
const double* x,
int i,
int num){
42 return vcl::Vec8d().load_partial( num, x+i);
44inline vcl::Vec8d make_vcl_vec8d(
float x,
int i){
45 return vcl::Vec8d((
double)x);
47inline vcl::Vec8d make_vcl_vec8d(
const float* x,
int i){
48 return vcl::Vec8d( x[i], x[i+1], x[i+2], x[i+3], x[i+4], x[i+5], x[i+6], x[i+7]);
50inline vcl::Vec8d make_vcl_vec8d(
float x,
int i,
int num){
51 return vcl::Vec8d((
double)x);
53inline vcl::Vec8d make_vcl_vec8d(
const float* x,
int i,
int num){
55 for(
int j=0; j<num; j++)
56 tmp[j] = (
double)x[i+j];
57 return vcl::Vec8d().load_partial( num, tmp);
61inline T get_element( T x,
int i){
65inline T get_element( T* x,
int i){
74inline std::enable_if_t<!std::is_integral_v<T>,T> KnuthTwoSum(T a, T b, T & s)
78 s = (a - (r - z)) + (b - z);
82inline std::enable_if_t<std::is_integral_v<T>,T> KnuthTwoSum(T a, T b, T & s)
89inline T TwoProductFMA(T a, T b, T &d) {
94 d = vcl::mul_sub_x(a, b, p);
103template<
typename T,
size_t N> UNROLL_ATTRIBUTE
104void Accumulate(T x, std::array<T,N>& fpe ,
int* status)
108 for(
unsigned int i = 0; i != N; ++i) {
110 fpe[i] = KnuthTwoSum(fpe[i], x, s);
116 if (x != T(0) && *status != 1) {
126template<
class T,
size_t N>
127inline T
Round(
const std::array<T,N>& fpe ) {
130 std::array<T, 2> fpe_red{T(0),T(0)};
132 for(
unsigned u = 0; u<N; u++)
134 return fpe_red[0] + fpe_red[1];
171inline void AccumulateWord( int64_t *accumulator,
int i, int64_t x) {
177 unsigned char overflow;
180 int64_t oldword = cpu::xadd(accumulator[i], x, overflow);
182 while(unlikely(overflow)) {
189 carry = (oldword + carry) >> DIGITS;
190 bool s = oldword > 0;
191 carrybit = (s ? 1ll << KRX : (
unsigned long long)(-1ll) << KRX);
194 cpu::xadd(accumulator[i], (int64_t) -(carry << DIGITS), overflow);
195 if(TSAFE && unlikely(s ^ overflow)) {
206 oldword = cpu::xadd(accumulator[i], carry, overflow);
223 int e = cpu::exponent(x);
224 int exp_word = e / DIGITS;
225 int iup = exp_word + F_WORDS;
227 double xscaled = cpu::myldexp(x, -DIGITS * exp_word);
230 for (i = iup; i>=0 && xscaled != 0; --i) {
231 double xrounded = cpu::myrint(xscaled);
232 int64_t xint = cpu::myllrint(xscaled);
233 AccumulateWord(accumulator, i, xint);
236 xscaled *= DELTASCALE;
240inline void Accumulate( int64_t* accumulator, vcl::Vec8d x) {
247 for(
unsigned int j = 0; j != 8; ++j) {
267inline bool Normalize( int64_t *accumulator,
int& imin,
int& imax) {
268 int64_t carry_in = accumulator[imin] >> DIGITS;
269 accumulator[imin] -= carry_in << DIGITS;
272 for (i = imin + 1; i < BIN_COUNT; ++i) {
273 accumulator[i] += carry_in;
274 int64_t carry_out = accumulator[i] >> DIGITS;
275 accumulator[i] -= (carry_out << DIGITS);
276 carry_in = carry_out;
281 accumulator[imax] += carry_in << DIGITS;
297inline double Round( int64_t * accumulator) {
300 bool negative =
Normalize(accumulator, imin, imax);
305 for(i = imax; i >= imin && accumulator[i] == 0; --i) {
311 for(; i >= imin && (accumulator[i] & ((1ll << DIGITS) - 1)) == ((1ll << DIGITS) - 1); --i) {
318 int64_t hiword = negative ? ((1ll << DIGITS) - 1) - accumulator[i] : accumulator[i];
319 double rounded = (double)hiword;
320 double hi = ldexp(rounded, (i - F_WORDS) * DIGITS);
322 return negative ? -hi : hi;
324 hiword -= std::llrint(rounded);
325 double mid = ldexp((
double) hiword, (i - F_WORDS) * DIGITS);
329 for (
int j = imin; j != i - 1; ++j) {
330 sticky |= negative ? ((1ll << DIGITS) - accumulator[j]) : accumulator[j];
333 int64_t loword = negative ? ((1ll << DIGITS) - accumulator[i - 1]) : accumulator[i - 1];
335 double lo = ldexp((
double) loword, (i - 1 - F_WORDS) * DIGITS);
340 lo = cpu::OddRoundSumNonnegative(mid, lo);
344 return negative ? -hi : hi;
bool Normalize(int64_t *accumulator, int &imin, int &imax)
Normalize a superaccumulator.
Definition accumulate.h:267