Coverage Report

Created: 2024-09-04 14:33

/src/arrow/cpp/src/arrow/vendored/fast_float/simple_decimal_conversion.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef FASTFLOAT_GENERIC_DECIMAL_TO_BINARY_H
2
#define FASTFLOAT_GENERIC_DECIMAL_TO_BINARY_H
3
4
/**
5
 * This code is meant to handle the case where we have more than 19 digits.
6
 *
7
 * It is based on work by Nigel Tao (at https://github.com/google/wuffs/)
8
 * who credits Ken Thompson for the design (via a reference to the Go source
9
 * code).
10
 *
11
 * Rob Pike suggested that this algorithm be called "Simple Decimal Conversion".
12
 *
13
 * It is probably not very fast but it is a fallback that should almost never
14
 * be used in real life. Though it is not fast, it is "easily" understood and debugged.
15
 **/
16
#include "ascii_number.h"
17
#include "decimal_to_binary.h"
18
#include <cstdint>
19
20
namespace arrow_vendored {
21
namespace fast_float {
22
23
namespace {
24
25
// remove all final zeroes
26
0
inline void trim(decimal &h) {
27
0
  while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) {
28
0
    h.num_digits--;
29
0
  }
30
0
}
31
32
33
34
0
uint32_t number_of_digits_decimal_left_shift(const decimal &h, uint32_t shift) {
35
0
  shift &= 63;
36
0
  const static uint16_t number_of_digits_decimal_left_shift_table[65] = {
37
0
    0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817,
38
0
    0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067,
39
0
    0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF,
40
0
    0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0,
41
0
    0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA,
42
0
    0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC,
43
0
    0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C,
44
0
    0x051C, 0x051C,
45
0
  };
46
0
  uint32_t x_a = number_of_digits_decimal_left_shift_table[shift];
47
0
  uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1];
48
0
  uint32_t num_new_digits = x_a >> 11;
49
0
  uint32_t pow5_a = 0x7FF & x_a;
50
0
  uint32_t pow5_b = 0x7FF & x_b;
51
0
  const static uint8_t
52
0
    number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = {
53
0
        5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, 3,
54
0
        9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, 2, 8,
55
0
        1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2, 5, 6, 1,
56
0
        0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1, 5, 2, 5, 8,
57
0
        7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, 3, 8, 1, 4, 6,
58
0
        9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, 8, 1, 2, 5, 9, 5,
59
0
        3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, 7, 1, 5, 8, 2, 0, 3,
60
0
        1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5, 6, 2, 5, 1, 1, 9, 2, 0,
61
0
        9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, 0, 4, 6, 4, 4, 7, 7, 5, 3,
62
0
        9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, 8, 7, 6, 9, 5, 3, 1, 2, 5, 1,
63
0
        4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8,
64
0
        0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2, 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4,
65
0
        6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, 6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5,
66
0
        7, 0, 3, 1, 2, 5, 9, 3, 1, 3, 2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5,
67
0
        6, 2, 5, 4, 6, 5, 6, 6, 1, 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2,
68
0
        5, 2, 3, 2, 8, 3, 0, 6, 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5,
69
0
        1, 1, 6, 4, 1, 5, 3, 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5,
70
0
        5, 8, 2, 0, 7, 6, 6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5,
71
0
        2, 9, 1, 0, 3, 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2,
72
0
        5, 1, 4, 5, 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0,
73
0
        6, 2, 5, 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2,
74
0
        0, 3, 1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1,
75
0
        6, 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6,
76
0
        4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7, 2,
77
0
        9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, 3, 5,
78
0
        0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, 2, 2, 7,
79
0
        3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, 9, 7, 6, 5,
80
0
        6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0, 2, 9, 7, 3, 9,
81
0
        3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, 8, 6, 0, 8, 0, 8,
82
0
        0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, 2, 8, 4, 2, 1, 7, 0,
83
0
        9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, 9, 7, 0, 7, 0, 3, 1, 2,
84
0
        5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2, 0, 0, 3, 7, 1, 7, 4, 2, 2,
85
0
        4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5, 4, 2, 7, 3, 5, 7, 6, 0, 1, 0,
86
0
        0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, 5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7,
87
0
        1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8,
88
0
        9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4,
89
0
        6, 7, 7, 8, 1, 0, 6, 6, 8, 9, 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1,
90
0
        9, 7, 0, 0, 1, 2, 5, 2, 3, 2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5,
91
0
        6, 2, 5, 4, 4, 4, 0, 8, 9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9,
92
0
        4, 5, 2, 6, 6, 7, 2, 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4,
93
0
        9, 2, 5, 0, 3, 1, 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4,
94
0
        0, 6, 2, 5, 1, 1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4,
95
0
        2, 3, 6, 3, 1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1,
96
0
        5, 1, 2, 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5,
97
0
        4, 1, 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1,
98
0
        3, 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1,
99
0
        3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, 9,
100
0
        5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3, 9, 0,
101
0
        3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6, 7, 6, 2,
102
0
        6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, 6, 1, 4, 1,
103
0
        8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, 6, 5, 6, 2, 5,
104
0
        1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, 4, 4, 1, 1, 9, 2,
105
0
        4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2, 5, 8, 6, 7, 3, 6, 1,
106
0
        7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, 6, 2, 2, 4, 0, 6, 9, 5,
107
0
        9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
108
0
  };
109
0
  const uint8_t *pow5 =
110
0
      &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a];
111
0
  uint32_t i = 0;
112
0
  uint32_t n = pow5_b - pow5_a;
113
0
  for (; i < n; i++) {
114
0
    if (i >= h.num_digits) {
115
0
      return num_new_digits - 1;
116
0
    } else if (h.digits[i] == pow5[i]) {
117
0
      continue;
118
0
    } else if (h.digits[i] < pow5[i]) {
119
0
      return num_new_digits - 1;
120
0
    } else {
121
0
      return num_new_digits;
122
0
    }
123
0
  }
124
0
  return num_new_digits;
125
0
}
126
127
0
uint64_t round(decimal &h) {
128
0
  if ((h.num_digits == 0) || (h.decimal_point < 0)) {
129
0
    return 0;
130
0
  } else if (h.decimal_point > 18) {
131
0
    return UINT64_MAX;
132
0
  }
133
  // at this point, we know that h.decimal_point >= 0
134
0
  uint32_t dp = uint32_t(h.decimal_point);
135
0
  uint64_t n = 0;
136
0
  for (uint32_t i = 0; i < dp; i++) {
137
0
    n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0);
138
0
  }
139
0
  bool round_up = false;
140
0
  if (dp < h.num_digits) {
141
0
    round_up = h.digits[dp] >= 5; // normally, we round up  
142
    // but we may need to round to even!
143
0
    if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) {
144
0
      round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1]));
145
0
    }
146
0
  }
147
0
  if (round_up) {
148
0
    n++;
149
0
  }
150
0
  return n;
151
0
}
152
153
// computes h * 2^-shift
154
0
void decimal_left_shift(decimal &h, uint32_t shift) {
155
0
  if (h.num_digits == 0) {
156
0
    return;
157
0
  }
158
0
  uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift);
159
0
  int32_t read_index = int32_t(h.num_digits - 1);
160
0
  uint32_t write_index = h.num_digits - 1 + num_new_digits;
161
0
  uint64_t n = 0;
162
163
0
  while (read_index >= 0) {
164
0
    n += uint64_t(h.digits[read_index]) << shift;
165
0
    uint64_t quotient = n / 10;
166
0
    uint64_t remainder = n - (10 * quotient);
167
0
    if (write_index < max_digits) {
168
0
      h.digits[write_index] = uint8_t(remainder);
169
0
    } else if (remainder > 0) {
170
0
      h.truncated = true;
171
0
    }
172
0
    n = quotient;
173
0
    write_index--;
174
0
    read_index--;
175
0
  }
176
0
  while (n > 0) {
177
0
    uint64_t quotient = n / 10;
178
0
    uint64_t remainder = n - (10 * quotient);
179
0
    if (write_index < max_digits) {
180
0
      h.digits[write_index] = uint8_t(remainder);
181
0
    } else if (remainder > 0) {
182
0
      h.truncated = true;
183
0
    }
184
0
    n = quotient;
185
0
    write_index--;
186
0
  }
187
0
  h.num_digits += num_new_digits;
188
0
  if (h.num_digits > max_digits) {
189
0
    h.num_digits = max_digits;
190
0
  }
191
0
  h.decimal_point += int32_t(num_new_digits);
192
0
  trim(h);
193
0
}
194
195
// computes h * 2^shift
196
0
void decimal_right_shift(decimal &h, uint32_t shift) {
197
0
  uint32_t read_index = 0;
198
0
  uint32_t write_index = 0;
199
200
0
  uint64_t n = 0;
201
202
0
  while ((n >> shift) == 0) {
203
0
    if (read_index < h.num_digits) {
204
0
      n = (10 * n) + h.digits[read_index++];
205
0
    } else if (n == 0) {
206
0
      return;
207
0
    } else {
208
0
      while ((n >> shift) == 0) {
209
0
        n = 10 * n;
210
0
        read_index++;
211
0
      }
212
0
      break;
213
0
    }
214
0
  }
215
0
  h.decimal_point -= int32_t(read_index - 1);
216
0
  if (h.decimal_point < -decimal_point_range) { // it is zero
217
0
    h.num_digits = 0;
218
0
    h.decimal_point = 0;
219
0
    h.negative = false;
220
0
    h.truncated = false;
221
0
    return;
222
0
  }
223
0
  uint64_t mask = (uint64_t(1) << shift) - 1;
224
0
  while (read_index < h.num_digits) {
225
0
    uint8_t new_digit = uint8_t(n >> shift);
226
0
    n = (10 * (n & mask)) + h.digits[read_index++];
227
0
    h.digits[write_index++] = new_digit;
228
0
  }
229
0
  while (n > 0) {
230
0
    uint8_t new_digit = uint8_t(n >> shift);
231
0
    n = 10 * (n & mask);
232
0
    if (write_index < max_digits) {
233
0
      h.digits[write_index++] = new_digit;
234
0
    } else if (new_digit > 0) {
235
0
      h.truncated = true;
236
0
    }
237
0
  }
238
0
  h.num_digits = write_index;
239
0
  trim(h);
240
0
}
241
242
} // end of anonymous namespace
243
244
template <typename binary>
245
0
adjusted_mantissa compute_float(decimal &d) {
246
0
  adjusted_mantissa answer;
247
0
  if (d.num_digits == 0) {
248
    // should be zero
249
0
    answer.power2 = 0;
250
0
    answer.mantissa = 0;
251
0
    return answer;
252
0
  }
253
  // At this point, going further, we can assume that d.num_digits > 0.
254
  //
255
  // We want to guard against excessive decimal point values because
256
  // they can result in long running times. Indeed, we do
257
  // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22
258
  // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not
259
  // fine (runs for a long time).
260
  //
261
0
  if(d.decimal_point < -324) {
262
    // We have something smaller than 1e-324 which is always zero
263
    // in binary64 and binary32.
264
    // It should be zero.
265
0
    answer.power2 = 0;
266
0
    answer.mantissa = 0;
267
0
    return answer;
268
0
  } else if(d.decimal_point >= 310) {
269
    // We have something at least as large as 0.1e310 which is
270
    // always infinite.  
271
0
    answer.power2 = binary::infinite_power();
272
0
    answer.mantissa = 0;
273
0
    return answer;
274
0
  }
275
0
  static const uint32_t max_shift = 60;
276
0
  static const uint32_t num_powers = 19;
277
0
  static const uint8_t powers[19] = {
278
0
      0,  3,  6,  9,  13, 16, 19, 23, 26, 29, //
279
0
      33, 36, 39, 43, 46, 49, 53, 56, 59,     //
280
0
  };
281
0
  int32_t exp2 = 0;
282
0
  while (d.decimal_point > 0) {
283
0
    uint32_t n = uint32_t(d.decimal_point);
284
0
    uint32_t shift = (n < num_powers) ? powers[n] : max_shift;
285
0
    decimal_right_shift(d, shift);
286
0
    if (d.decimal_point < -decimal_point_range) {
287
      // should be zero
288
0
      answer.power2 = 0;
289
0
      answer.mantissa = 0;
290
0
      return answer;
291
0
    }
292
0
    exp2 += int32_t(shift);
293
0
  }
294
  // We shift left toward [1/2 ... 1].
295
0
  while (d.decimal_point <= 0) {
296
0
    uint32_t shift;
297
0
    if (d.decimal_point == 0) {
298
0
      if (d.digits[0] >= 5) {
299
0
        break;
300
0
      }
301
0
      shift = (d.digits[0] < 2) ? 2 : 1;
302
0
    } else {
303
0
      uint32_t n = uint32_t(-d.decimal_point);
304
0
      shift = (n < num_powers) ? powers[n] : max_shift;
305
0
    }
306
0
    decimal_left_shift(d, shift);
307
0
    if (d.decimal_point > decimal_point_range) {
308
      // we want to get infinity:
309
0
      answer.power2 = binary::infinite_power();
310
0
      answer.mantissa = 0;
311
0
      return answer;
312
0
    }
313
0
    exp2 -= int32_t(shift);
314
0
  }
315
  // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2].
316
0
  exp2--;
317
0
  constexpr int32_t minimum_exponent = binary::minimum_exponent();
318
0
  while ((minimum_exponent + 1) > exp2) {
319
0
    uint32_t n = uint32_t((minimum_exponent + 1) - exp2);
320
0
    if (n > max_shift) {
321
0
      n = max_shift;
322
0
    }
323
0
    decimal_right_shift(d, n);
324
0
    exp2 += int32_t(n);
325
0
  }
326
0
  if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
327
0
    answer.power2 = binary::infinite_power();
328
0
    answer.mantissa = 0;
329
0
    return answer;
330
0
  }
331
332
0
  const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1;
333
0
  decimal_left_shift(d, mantissa_size_in_bits);
334
335
0
  uint64_t mantissa = round(d);
336
  // It is possible that we have an overflow, in which case we need
337
  // to shift back.
338
0
  if(mantissa >= (uint64_t(1) << mantissa_size_in_bits)) {
339
0
    decimal_right_shift(d, 1);
340
0
    exp2 += 1;
341
0
    mantissa = round(d);
342
0
    if ((exp2 - minimum_exponent) >= binary::infinite_power()) {
343
0
      answer.power2 = binary::infinite_power();
344
0
      answer.mantissa = 0;
345
0
      return answer;
346
0
    }
347
0
  }
348
0
  answer.power2 = exp2  - binary::minimum_exponent();
349
0
  if(mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) { answer.power2--; }
350
0
  answer.mantissa = mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1);
351
0
  return answer;
352
0
}
Unexecuted instantiation: arrow_vendored::fast_float::adjusted_mantissa arrow_vendored::fast_float::compute_float<arrow_vendored::fast_float::binary_format<float> >(arrow_vendored::fast_float::decimal&)
Unexecuted instantiation: arrow_vendored::fast_float::adjusted_mantissa arrow_vendored::fast_float::compute_float<arrow_vendored::fast_float::binary_format<double> >(arrow_vendored::fast_float::decimal&)
353
354
template <typename binary>
355
0
adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
356
0
    decimal d = parse_decimal(first, last);
357
0
    return compute_float<binary>(d);
358
0
}
Unexecuted instantiation: arrow_vendored::fast_float::adjusted_mantissa arrow_vendored::fast_float::parse_long_mantissa<arrow_vendored::fast_float::binary_format<float> >(char const*, char const*)
Unexecuted instantiation: arrow_vendored::fast_float::adjusted_mantissa arrow_vendored::fast_float::parse_long_mantissa<arrow_vendored::fast_float::binary_format<double> >(char const*, char const*)
359
360
} // namespace fast_float
361
} // namespace arrow_vendored
362
#endif