DCCL v4
field_codec_arithmetic.h
1 // Copyright 2012-2023:
2 // GobySoft, LLC (2013-)
3 // Massachusetts Institute of Technology (2007-2014)
4 // Community contributors (see AUTHORS file)
5 // File authors:
6 // Toby Schneider <toby@gobysoft.org>
7 //
8 //
9 // This file is part of the Dynamic Compact Control Language Library
10 // ("DCCL").
11 //
12 // DCCL is free software: you can redistribute it and/or modify
13 // it under the terms of the GNU Lesser General Public License as published by
14 // the Free Software Foundation, either version 2.1 of the License, or
15 // (at your option) any later version.
16 //
17 // DCCL is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 // GNU Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public License
23 // along with DCCL. If not, see <http://www.gnu.org/licenses/>.
24 // This code is adapted from the reference code provided by Witten et al. "Arithmetic Coding for Data Compression," Communications of the ACM, June 1987, Vol 30, Number 6
25 
26 #ifndef DCCLFIELDCODECARITHMETIC20120726H
27 #define DCCLFIELDCODECARITHMETIC20120726H
28 
29 #include <algorithm>
30 #include <limits>
31 #include <utility>
32 
33 #include "../field_codec_typed.h"
34 
35 #include "dccl/arithmetic/protobuf/arithmetic.pb.h"
36 #include "dccl/arithmetic/protobuf/arithmetic_extensions.pb.h"
37 
38 #include "../logger.h"
39 
40 #include "../binary.h"
41 #include "../thread_safety.h"
42 
43 extern "C"
44 {
45  void dccl3_load(dccl::Codec* dccl);
46  void dccl3_unload(dccl::Codec* dccl);
47  void dccl_arithmetic_load(dccl::Codec* dccl);
48  void dccl_arithmetic_unload(dccl::Codec* dccl);
49 }
50 
51 namespace dccl
52 {
54 namespace arith
55 {
56 class ModelManager;
57 
58 ModelManager& model_manager(FieldCodecManagerLocal& manager);
59 
60 class Model
61 {
62  public:
63  typedef uint32 freq_type;
64  using symbol_type = int; // google protobuf RepeatedField size type
65  using value_type = double;
66 
67  static constexpr symbol_type OUT_OF_RANGE_SYMBOL = -1;
68  static constexpr symbol_type EOF_SYMBOL = -2;
69  static constexpr symbol_type MIN_SYMBOL = EOF_SYMBOL;
70 
71  static constexpr int CODE_VALUE_BITS = 32;
72  static constexpr int FREQUENCY_BITS = CODE_VALUE_BITS - 2;
73 
74  static constexpr freq_type MAX_FREQUENCY = (1 << FREQUENCY_BITS) - 1;
75 
76 #if DCCL_THREAD_SUPPORT
77  static std::recursive_mutex last_bits_map_mutex;
78 #define LOCK_LAST_BITS_MAP_MUTEX \
79  std::lock_guard<std::recursive_mutex> l(dccl::arith::Model::last_bits_map_mutex);
80 #else
81 #define LOCK_LAST_BITS_MAP_MUTEX
82 #endif
83  // maps message name -> map of field name -> last size (bits)
84  static std::map<std::string, std::map<std::string, Bitset>> last_bits_map;
85 
86  Model(protobuf::ArithmeticModel user) : user_model_(std::move(user)) {}
87 
88  enum ModelState
89  {
90  ENCODER,
91  DECODER
92  };
93 
94  symbol_type value_to_symbol(value_type value) const;
95  value_type symbol_to_value(symbol_type symbol) const;
96  symbol_type total_symbols() // EOF and OUT_OF_RANGE plus all user defined
97  {
98  return encoder_cumulative_freqs_.size();
99  }
100 
101  const protobuf::ArithmeticModel& user_model() const { return user_model_; }
102 
103  symbol_type max_symbol() const { return user_model_.frequency_size() - 1; }
104 
105  freq_type total_freq(ModelState state) const
106  {
107  const auto& c_freqs =
108  (state == ENCODER) ? encoder_cumulative_freqs_ : decoder_cumulative_freqs_;
109 
110  return c_freqs.at(max_symbol());
111  }
112 
113  void update_model(symbol_type symbol, ModelState state);
114 
115  std::pair<freq_type, freq_type> symbol_to_cumulative_freq(symbol_type symbol,
116  ModelState state) const;
117  std::pair<symbol_type, symbol_type>
118  cumulative_freq_to_symbol(std::pair<freq_type, freq_type> c_freq_pair, ModelState state) const;
119 
120  friend class ModelManager;
121 
122  private:
123  protobuf::ArithmeticModel user_model_;
124  std::map<symbol_type, freq_type> encoder_cumulative_freqs_;
125  std::map<symbol_type, freq_type> decoder_cumulative_freqs_;
126 };
127 
129 {
130  public:
131  static void set_model(dccl::Codec& codec, const protobuf::ArithmeticModel& model);
132 
133  Model& find(const std::string& name)
134  {
135  auto it = arithmetic_models_.find(name);
136  if (it == arithmetic_models_.end())
137  throw(Exception("Cannot find model called: " + name));
138  else
139  return it->second;
140  }
141 
142  private:
143  void _set_model(const protobuf::ArithmeticModel& model)
144  {
145  Model new_model(model);
146  _create_and_validate_model(&new_model);
147  if (arithmetic_models_.count(model.name()))
148  arithmetic_models_.erase(model.name());
149  arithmetic_models_.insert(std::make_pair(model.name(), new_model));
150  }
151 
152  void _create_and_validate_model(Model* model)
153  {
154  if (!model->user_model_.IsInitialized())
155  {
156  throw(Exception("Invalid model: " + model->user_model_.DebugString() +
157  "Missing fields: " + model->user_model_.InitializationErrorString()));
158  }
159 
160  Model::freq_type cumulative_freq = 0;
161  for (Model::symbol_type symbol = Model::MIN_SYMBOL, n = model->user_model_.frequency_size();
162  symbol < n; ++symbol)
163  {
164  Model::freq_type freq;
165  if (symbol == Model::EOF_SYMBOL)
166  freq = model->user_model_.eof_frequency();
167  else if (symbol == Model::OUT_OF_RANGE_SYMBOL)
168  freq = model->user_model_.out_of_range_frequency();
169  else
170  freq = model->user_model_.frequency(symbol);
171 
172  if (freq == 0 && symbol != Model::OUT_OF_RANGE_SYMBOL && symbol != Model::EOF_SYMBOL)
173  {
174  throw(Exception("Invalid model: " + model->user_model_.DebugString() +
175  "All frequencies must be nonzero."));
176  }
177  cumulative_freq += freq;
178  model->encoder_cumulative_freqs_.insert(std::make_pair(symbol, cumulative_freq));
179  }
180 
181  // must have separate models for adaptive encoding.
182  model->decoder_cumulative_freqs_ = model->encoder_cumulative_freqs_;
183 
184  if (model->total_freq(Model::ENCODER) > Model::MAX_FREQUENCY)
185  {
186  throw(Exception("Invalid model: " + model->user_model_.DebugString() +
187  "Sum of all frequencies must be less than " +
188  std::to_string(Model::MAX_FREQUENCY) +
189  " in order to use 64 bit arithmetic"));
190  }
191 
192  if (model->user_model_.value_bound_size() != model->user_model_.frequency_size() + 1)
193  {
194  throw(Exception("Invalid model: " + model->user_model_.DebugString() +
195  "`value_bound` size must be exactly 1 more than number of symbols (= "
196  "size of `frequency`)."));
197  }
198 
199  // is `value_bound` repeated field strictly monotonically increasing?
200  if (std::adjacent_find(
201  model->user_model_.value_bound().begin(), model->user_model_.value_bound().end(),
202  std::greater_equal<Model::value_type>()) != model->user_model_.value_bound().end())
203  {
204  throw(Exception("Invalid model: " + model->user_model_.DebugString() +
205  "`value_bound` must be monotonically increasing."));
206  }
207  }
208 
209  private:
210  std::map<std::string, Model> arithmetic_models_;
211 };
212 
213 template <typename FieldType = Model::value_type>
214 class ArithmeticFieldCodecBase : public RepeatedTypedFieldCodec<Model::value_type, FieldType>
215 {
216  public:
217  static constexpr uint64 TOP_VALUE =
218  (static_cast<uint64>(1) << Model::CODE_VALUE_BITS) - 1; // 11111111...
219  static constexpr uint64 HALF =
220  (static_cast<uint64>(1) << (Model::CODE_VALUE_BITS - 1)); // 10000000...
221  static constexpr uint64 FIRST_QTR = HALF >> 1; // 01000000...
222  static constexpr uint64 THIRD_QTR = HALF + FIRST_QTR; // 11000000...
223 
224  Bitset encode_repeated(const std::vector<Model::value_type>& wire_value) override
225  {
226  return encode_repeated(wire_value, true);
227  }
228 
229  Bitset encode_repeated(const std::vector<Model::value_type>& wire_value, bool update_model)
230  {
231  using dccl::dlog;
232  using namespace dccl::logger;
233  Model& model = current_model();
234 
235  uint64 low = 0; // lowest code value (0.0 in decimal version)
236  uint64 high = TOP_VALUE; // highest code value (1.0 in decimal version)
237  int bits_to_follow = 0; // bits to follow with after expanding around half
238  Bitset bits;
239 
240  for (unsigned value_index = 0, n = max_repeat(); value_index < n; ++value_index)
241  {
242  Model::symbol_type symbol = Model::EOF_SYMBOL;
243 
244  if (wire_value.size() > value_index)
245  {
246  Model::value_type value = wire_value[value_index];
247  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) value is : " << value
248  << std::endl;
249 
250  symbol = model.value_to_symbol(value);
251  }
252 
253  // if out-of-range is given no frequency, end encoding
254  if (symbol == Model::OUT_OF_RANGE_SYMBOL &&
255  model.user_model().out_of_range_frequency() == 0)
256  {
257  dlog.is(DEBUG2) && dlog << "(ArithmeticFieldCodec) out of range symbol, but no "
258  "frequency given; ending encoding"
259  << std::endl;
260 
261  symbol = Model::EOF_SYMBOL;
262  }
263 
264  // if EOF_SYMBOL is given no frequency, use most probable symbol and give a warning
265  if (symbol == Model::EOF_SYMBOL && model.user_model().eof_frequency() == 0)
266  {
267  dlog.is(DEBUG2) && dlog << "(ArithmeticFieldCodec) end of file, but no frequency "
268  "given; filling with most probable symbol"
269  << std::endl;
270  symbol = *std::max_element(model.user_model().frequency().begin(),
271  model.user_model().frequency().end());
272  }
273 
274  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) symbol is : " << symbol << std::endl;
275 
276  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) current interval: ["
277  << (double)low / TOP_VALUE << "," << (double)high / TOP_VALUE
278  << ")" << std::endl;
279 
280  uint64 range = (high - low) + 1;
281 
282  std::pair<Model::freq_type, Model::freq_type> c_freq_range =
283  model.symbol_to_cumulative_freq(symbol, Model::ENCODER);
284 
285  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) input symbol (" << symbol
286  << ") cumulative freq: [" << c_freq_range.first << ","
287  << c_freq_range.second << ")" << std::endl;
288 
289  high = low + (range * c_freq_range.second) / model.total_freq(Model::ENCODER) - 1;
290  low += (range * c_freq_range.first) / model.total_freq(Model::ENCODER);
291 
292  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) input symbol (" << symbol
293  << ") interval: [" << (double)low / TOP_VALUE << ","
294  << (double)high / TOP_VALUE << ")" << std::endl;
295 
296  dlog.is(DEBUG3) &&
297  dlog << "(ArithmeticFieldCodec) Q1: " << Bitset(Model::CODE_VALUE_BITS, FIRST_QTR)
298  << ", Q2: " << Bitset(Model::CODE_VALUE_BITS, HALF)
299  << ", Q3 : " << Bitset(Model::CODE_VALUE_BITS, THIRD_QTR)
300  << ", top: " << Bitset(Model::CODE_VALUE_BITS, TOP_VALUE) << std::endl;
301 
302  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) low: "
303  << Bitset(Model::CODE_VALUE_BITS, low).to_string() << std::endl;
304  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) high: "
305  << Bitset(Model::CODE_VALUE_BITS, high).to_string()
306  << std::endl;
307 
308  if (update_model)
309  model.update_model(symbol, Model::ENCODER);
310 
311  for (;;)
312  {
313  if (high < HALF)
314  {
315  bit_plus_follow(&bits, &bits_to_follow, 0);
316  dlog.is(DEBUG3) &&
317  dlog << "(ArithmeticFieldCodec): completely in [0, 0.5): EXPAND"
318  << std::endl;
319  }
320  else if (low >= HALF)
321  {
322  bit_plus_follow(&bits, &bits_to_follow, 1);
323  low -= HALF;
324  high -= HALF;
325  dlog.is(DEBUG3) &&
326  dlog << "(ArithmeticFieldCodec): completely in [0.5, 1): EXPAND"
327  << std::endl;
328  }
329  else if (low >= FIRST_QTR && high < THIRD_QTR)
330  {
331  dlog.is(DEBUG3) &&
332  dlog << "(ArithmeticFieldCodec): straddle middle [0.25, 0.75): EXPAND"
333  << std::endl;
334 
335  bits_to_follow += 1;
336  low -= FIRST_QTR;
337  high -= FIRST_QTR;
338  }
339  else
340  break;
341 
342  low <<= 1;
343  high <<= 1;
344  high += 1;
345 
346  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) low: "
347  << Bitset(Model::CODE_VALUE_BITS, low).to_string()
348  << std::endl;
349  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) high: "
350  << Bitset(Model::CODE_VALUE_BITS, high).to_string()
351  << std::endl;
352 
353  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) current interval: ["
354  << (double)low / TOP_VALUE << ","
355  << (double)high / TOP_VALUE << ")" << std::endl;
356  }
357 
358  // nothing more to do, we're encoding all the data and an EOF
359  if (value_index == wire_value.size())
360  break;
361  }
362 
363  // output exactly the number of bits required to unambiguously
364  // store the final range's state
365  // 0 . . . 1
366  // | | -- output nothing, unless we have follow bits
367  // | | -- output a single 0
368  if (low == 0) // high must be greater than half
369  {
370  if (high != TOP_VALUE || bits_to_follow > 0)
371  bit_plus_follow(&bits, &bits_to_follow, 0);
372  }
373  // 0 . . . 1
374  // | | -- output a single 1
375  else if (high == TOP_VALUE) // 0 < low < half
376  {
377  bit_plus_follow(&bits, &bits_to_follow, 1);
378  }
379  // 0 . . . 1
380  // | | -- output 01
381  // | | -- output 10
382  else
383  {
384  bits_to_follow += 1;
385  bit_plus_follow(&bits, &bits_to_follow, (low < FIRST_QTR) ? 0 : 1);
386  }
387 
388  if (FieldCodecBase::dccl_field_options().GetExtension(arithmetic).debug_assert())
389  {
390  LOCK_LAST_BITS_MAP_MUTEX
391  // bit of a hack so I can get at the exact bit field sizes
392  Model::last_bits_map[FieldCodecBase::this_descriptor()->full_name()]
393  [FieldCodecBase::this_field()->name()] = bits;
394  }
395 
396  return bits;
397  }
398 
399  void bit_plus_follow(Bitset* bits, int* bits_to_follow, bool bit)
400  {
401  bits->push_back(bit);
402  dccl::dlog.is(dccl::logger::DEBUG3) &&
403  dccl::dlog << "(ArithmeticFieldCodec): emitted bit: " << bit << std::endl;
404 
405  while (*bits_to_follow)
406  {
407  dccl::dlog.is(dccl::logger::DEBUG3) &&
408  dccl::dlog << "(ArithmeticFieldCodec): emitted bit (from follow): " << !bit
409  << std::endl;
410 
411  bits->push_back(!bit);
412  (*bits_to_follow) -= 1;
413  }
414  }
415 
416  std::vector<Model::value_type> decode_repeated(Bitset* bits) override
417  {
418  using dccl::dlog;
419  using namespace dccl::logger;
420 
421  std::vector<Model::value_type> values;
422  Model& model = current_model();
423 
424  uint64 value = 0;
425  uint64 low = 0;
426  uint64 high = TOP_VALUE;
427 
428  // offset from `bits` to currently examined `value`
429  // there are `bit_stream_offset` zeros in the lower bits of `value`
430  int bit_stream_offset = Model::CODE_VALUE_BITS - bits->size();
431 
432  for (int i = 0, n = Model::CODE_VALUE_BITS; i < n; ++i)
433  {
434  if (i >= bit_stream_offset)
435  value |=
436  (static_cast<uint64>((*bits)[bits->size() - (i - bit_stream_offset) - 1]) << i);
437  }
438 
439  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec): starting value: "
440  << Bitset(Model::CODE_VALUE_BITS, value).to_string() << std::endl;
441 
442  for (unsigned value_index = 0, n = max_repeat(); value_index < n; ++value_index)
443  {
444  uint64 range = (high - low) + 1;
445 
446  Model::symbol_type symbol = bits_to_symbol(bits, value, bit_stream_offset, low, range);
447 
448  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) symbol is: " << symbol << std::endl;
449 
450  std::pair<Model::freq_type, Model::freq_type> c_freq_range =
451  model.symbol_to_cumulative_freq(symbol, Model::DECODER);
452 
453  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) input symbol (" << symbol
454  << ") cumulative freq: [" << c_freq_range.first << ","
455  << c_freq_range.second << ")" << std::endl;
456 
457  high = low + (range * c_freq_range.second) / model.total_freq(Model::DECODER) - 1;
458  low += (range * c_freq_range.first) / model.total_freq(Model::DECODER);
459 
460  model.update_model(symbol, Model::DECODER);
461 
462  if (symbol == Model::EOF_SYMBOL)
463  break;
464 
465  values.push_back(model.symbol_to_value(symbol));
466 
467  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) value is: " << values.back()
468  << std::endl;
469 
470  for (;;)
471  {
472  if (high < HALF)
473  {
474  // nothing
475  }
476  else if (low >= HALF)
477  {
478  value -= HALF;
479  low -= HALF;
480  high -= HALF;
481  }
482  else if (low >= FIRST_QTR && high < THIRD_QTR)
483  {
484  value -= FIRST_QTR;
485  low -= FIRST_QTR;
486  high -= FIRST_QTR;
487  }
488  else
489  break;
490 
491  low <<= 1;
492  high <<= 1;
493  high += 1;
494  value <<= 1;
495  bit_stream_offset += 1;
496  }
497  }
498 
499  // for debugging / testing
500  if (FieldCodecBase::dccl_field_options().GetExtension(arithmetic).debug_assert())
501  {
502  LOCK_LAST_BITS_MAP_MUTEX
503  // must consume same bits as encoded makes
504  Bitset in = Model::last_bits_map[FieldCodecBase::this_descriptor()->full_name()]
505  [FieldCodecBase::this_field()->name()];
506 
507  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) bits used is (" << bits->size()
508  << "): " << *bits << std::endl;
509  dlog.is(DEBUG3) && dlog << "(ArithmeticFieldCodec) bits original is (" << in.size()
510  << "): " << in << std::endl;
511 
512  assert(in == *bits);
513  }
514 
515  return values;
516  }
517 
518  unsigned size_repeated(const std::vector<Model::value_type>& wire_values) override
519  {
520  // we should really cache this for efficiency
521  return encode_repeated(wire_values, false).size();
522  }
523 
524  // this maximum size will be upper bounded by: ceil(log_2(1/P)) + 1 where P is the
525  // probability of this least probable set of symbols
526  unsigned max_size_repeated() override
527  {
528  using dccl::log2;
529  Model& model = current_model();
530 
531  // if user doesn't provide out_of_range frequency, set it to max to force this
532  // calculation to return the lowest probability symbol in use
533  Model::freq_type out_of_range_freq = model.user_model().out_of_range_frequency();
534  if (out_of_range_freq == 0)
535  out_of_range_freq = Model::MAX_FREQUENCY;
536 
537  Model::value_type lowest_frequency =
538  std::min(out_of_range_freq, *std::min_element(model.user_model().frequency().begin(),
539  model.user_model().frequency().end()));
540 
541  // full of least probable symbols
542  auto size_least_probable = (unsigned)(std::ceil(
543  max_repeat() * (log2(model.total_freq(Model::ENCODER)) - log2(lowest_frequency))));
544 
545  dccl::dlog.is(dccl::logger::DEBUG3) &&
546  dccl::dlog << "(ArithmeticFieldCodec) size_least_probable: " << size_least_probable
547  << std::endl;
548 
549  Model::freq_type eof_freq = model.user_model().eof_frequency();
550  // almost full of least probable symbols plus EOF
551  auto size_least_probable_plus_eof =
552  (unsigned)((eof_freq != 0)
553  ? std::ceil(max_repeat() * log2(model.total_freq(Model::ENCODER)) -
554  (max_repeat() - 1) * log2(lowest_frequency) - log2(eof_freq))
555  : 0);
556 
557  dccl::dlog.is(dccl::logger::DEBUG3) &&
558  dccl::dlog << "(ArithmeticFieldCodec) size_least_probable_plus_eof: "
559  << size_least_probable_plus_eof << std::endl;
560 
561  return std::max(size_least_probable_plus_eof, size_least_probable) + 1;
562  }
563 
564  unsigned min_size_repeated() override
565  {
566  using dccl::log2;
567  const Model& model = current_model();
568 
569  if (model.user_model().is_adaptive())
570  return 0; // force examining bits from the beginning on decode
571 
572  // if user doesn't provide out_of_range frequency, set it to 1 (minimum) to force this
573  // calculation to return the highest probability symbol in use
574  Model::freq_type out_of_range_freq = model.user_model().out_of_range_frequency();
575  if (out_of_range_freq == 0)
576  out_of_range_freq = 1;
577 
578  Model::freq_type eof_freq = model.user_model().eof_frequency();
579  // just EOF
580  auto size_empty =
581  (unsigned)((eof_freq != 0)
582  ? std::ceil(log2(model.total_freq(Model::ENCODER)) - log2(eof_freq))
583  : std::numeric_limits<unsigned>::max());
584 
585  dccl::dlog.is(dccl::logger::DEBUG3) &&
586  dccl::dlog << "(ArithmeticFieldCodec) size_empty: " << size_empty << std::endl;
587 
588  // full with most probable symbol
589  Model::value_type highest_frequency =
590  std::max(out_of_range_freq, *std::max_element(model.user_model().frequency().begin(),
591  model.user_model().frequency().end()));
592 
593  auto size_most_probable = (unsigned)(std::ceil(
594  max_repeat() * (log2(model.total_freq(Model::ENCODER)) - log2(highest_frequency))));
595 
596  dccl::dlog.is(dccl::logger::DEBUG3) &&
597  dccl::dlog << "(ArithmeticFieldCodec) size_most_probable: " << size_most_probable
598  << std::endl;
599 
600  return std::min(size_empty, size_most_probable);
601  }
602 
603  void validate() override
604  {
606  "missing (dccl.field).arithmetic");
607 
608  std::string model_name =
609  FieldCodecBase::dccl_field_options().GetExtension(arithmetic).model();
610  try
611  {
612  model_manager().find(model_name);
613  }
614  catch (Exception& e)
615  {
616  FieldCodecBase::require(false, "no such (dccl.field).arithmetic.model called \"" +
617  model_name + "\" loaded.");
618  }
619  }
620 
621  // end inherited methods
622 
623  Model::symbol_type bits_to_symbol(Bitset* bits, uint64& value, int& bit_stream_offset,
624  uint64 low, uint64 range)
625  {
626  Model& model = current_model();
627 
628  for (;;)
629  {
630  uint64 value_high = (bit_stream_offset > 0)
631  ? value + ((static_cast<uint64>(1) << bit_stream_offset) - 1)
632  : value;
633 
634  dccl::dlog.is(dccl::logger::DEBUG3) &&
635  dccl::dlog << "(ArithmeticFieldCodec): value range: ["
636  << Bitset(Model::CODE_VALUE_BITS, value) << ","
637  << Bitset(Model::CODE_VALUE_BITS, value_high) << ")" << std::endl;
638 
639  Model::freq_type cumulative_freq =
640  ((value - low + 1) * model.total_freq(Model::DECODER) - 1) / range;
641  Model::freq_type cumulative_freq_high =
642  ((value_high - low + 1) * model.total_freq(Model::DECODER) - 1) / range;
643 
644  dccl::dlog.is(dccl::logger::DEBUG3) &&
645  dccl::dlog << "(ArithmeticFieldCodec): c_freq: " << cumulative_freq
646  << ", c_freq_high: " << cumulative_freq_high << std::endl;
647 
648  std::pair<Model::symbol_type, Model::symbol_type> symbol_pair =
649  model.cumulative_freq_to_symbol(
650  std::make_pair(cumulative_freq, cumulative_freq_high), Model::DECODER);
651 
652  dccl::dlog.is(dccl::logger::DEBUG3) &&
653  dccl::dlog << "(ArithmeticFieldCodec): symbol: " << symbol_pair.first << ", "
654  << symbol_pair.second << std::endl;
655 
656  if (symbol_pair.first == symbol_pair.second)
657  return symbol_pair.first;
658 
659  // add another bit to disambiguate
660  bits->get_more_bits(1);
661 
662  dccl::dlog.is(dccl::logger::DEBUG3) &&
663  dccl::dlog << "(ArithmeticFieldCodec): bits: " << *bits << std::endl;
664 
665  --bit_stream_offset;
666  value |= static_cast<uint64>(bits->back()) << bit_stream_offset;
667 
668  dccl::dlog.is(dccl::logger::DEBUG3) &&
669  dccl::dlog << "(ArithmeticFieldCodec): ambiguous (symbol could be "
670  << symbol_pair.first << " or " << symbol_pair.second << ")" << std::endl;
671  }
672 
673  return 0;
674  }
675 
676  dccl::int32 max_repeat()
677  {
678  return FieldCodecBase::this_field()->is_repeated()
679  ? FieldCodecBase::dccl_field_options().max_repeat()
680  : 1;
681  }
682 
683  Model& current_model()
684  {
685  std::string name = FieldCodecBase::dccl_field_options().GetExtension(arithmetic).model();
686  return model_manager().find(name);
687  }
688 
689  ModelManager& model_manager() { return dccl::arith::model_manager(this->manager()); }
690 };
691 
692 // constant integer definitions
693 template <typename FieldType> const uint64 ArithmeticFieldCodecBase<FieldType>::TOP_VALUE;
694 template <typename FieldType> const uint64 ArithmeticFieldCodecBase<FieldType>::FIRST_QTR;
695 template <typename FieldType> const uint64 ArithmeticFieldCodecBase<FieldType>::HALF;
696 template <typename FieldType> const uint64 ArithmeticFieldCodecBase<FieldType>::THIRD_QTR;
697 
698 template <typename FieldType>
700 {
701  Model::value_type pre_encode(const FieldType& field_value) override
702  {
703  return static_cast<Model::value_type>(field_value);
704  }
705 
706  FieldType post_decode(const Model::value_type& wire_value) override
707  {
708  return static_cast<FieldType>(wire_value);
709  }
710 };
711 
712 template <>
713 class ArithmeticFieldCodec<const google::protobuf::EnumValueDescriptor*>
714  : public ArithmeticFieldCodecBase<const google::protobuf::EnumValueDescriptor*>
715 {
716  public:
717  Model::value_type
718  pre_encode(const google::protobuf::EnumValueDescriptor* const& field_value) override
719  {
720  return field_value->number();
721  }
722 
723  const google::protobuf::EnumValueDescriptor*
724  post_decode(const Model::value_type& wire_value) override
725  {
726  const google::protobuf::EnumDescriptor* e = FieldCodecBase::this_field()->enum_type();
727  const google::protobuf::EnumValueDescriptor* return_value =
728  e->FindValueByNumber((int)wire_value);
729 
730  if (return_value)
731  return return_value;
732  else
733  throw NullValueException();
734  }
735 };
736 
737 } // namespace arith
738 } // namespace dccl
739 
740 #endif
dccl::FieldCodecManagerLocal
A class for managing the various field codecs. Here you can add and remove field codecs....
Definition: field_codec_manager.h:39
dccl::FieldCodecBase::this_descriptor
const google::protobuf::Descriptor * this_descriptor() const
Returns the Descriptor (message schema meta-data) for the immediate parent Message.
Definition: field_codec.cpp:657
dccl::arith::ArithmeticFieldCodecBase
Definition: field_codec_arithmetic.h:214
dccl::uint64
google::protobuf::uint64 uint64
an unsigned 64 bit integer
Definition: common.h:60
dccl::Bitset::get_more_bits
void get_more_bits(size_type num_bits)
Retrieve more bits from the parent Bitset.
Definition: bitset.h:420
dccl::arith::ArithmeticFieldCodecBase::validate
void validate() override
Validate a field. Use require() inside your overloaded validate() to assert requirements or throw Exc...
Definition: field_codec_arithmetic.h:603
dccl
Dynamic Compact Control Language namespace.
Definition: any.h:46
dccl::FieldCodecBase::this_field
const google::protobuf::FieldDescriptor * this_field() const
Returns the FieldDescriptor (field schema meta-data) for this field.
Definition: field_codec.cpp:652
dccl::Logger::is
bool is(logger::Verbosity verbosity, logger::Group group=logger::GENERAL)
Indicates the verbosity of the Logger until the next std::flush or std::endl. The boolean return is u...
Definition: logger.h:192
dccl::Exception
Exception class for DCCL.
Definition: exception.h:46
dccl::arith::ModelManager
Definition: field_codec_arithmetic.h:128
dccl::arith::ArithmeticFieldCodecBase::decode_repeated
std::vector< Model::value_type > decode_repeated(Bitset *bits) override
Decode a repeated field.
Definition: field_codec_arithmetic.h:416
dccl::uint32
google::protobuf::uint32 uint32
an unsigned 32 bit integer
Definition: common.h:56
dccl::FieldCodecBase::require
void require(bool b, const std::string &description)
Essentially an assertion to be used in the validate() virtual method.
Definition: field_codec.h:348
dccl::Codec
The Dynamic CCL enCODer/DECoder. This is the main class you will use to load, encode and decode DCCL ...
Definition: codec.h:62
dccl::FieldCodecSelector< Model::value_type, Model::value_type >::post_decode
virtual Model::value_type post_decode(const Model::value_type &wire_value)=0
Convert from the WireType representation (used with encode() and decode(), i.e. "on the wire") to the...
dccl::NullValueException
Exception used to signal null (non-existent) value within field codecs during decode.
Definition: exception.h:61
dccl::Bitset
A variable size container of bits (subclassed from std::deque<bool>) with an optional hierarchy....
Definition: bitset.h:42
dccl::FieldCodecSelector< Model::value_type, Model::value_type >::pre_encode
virtual Model::value_type pre_encode(const Model::value_type &field_value)=0
Convert from the FieldType representation (used in the Google Protobuf message) to the WireType repre...
dccl::Bitset::to_string
std::string to_string() const
Returns the value of the Bitset as a printable string, where each bit is represented by '1' or '0'....
Definition: bitset.h:278
dccl::arith::protobuf::ArithmeticModel
Definition: arithmetic.pb.h:73
dccl::arith::Model
Definition: field_codec_arithmetic.h:60
dccl::arith::ArithmeticFieldCodec
Definition: field_codec_arithmetic.h:699
dccl::arith::ArithmeticFieldCodecBase::max_size_repeated
unsigned max_size_repeated() override
Give the max size of a repeated field.
Definition: field_codec_arithmetic.h:526
dccl::RepeatedTypedFieldCodec
Base class for "repeated" (multiple value) static-typed (no dccl::any) field encoders/decoders....
Definition: field_codec_typed.h:223
dccl::int32
google::protobuf::int32 int32
a signed 32 bit integer
Definition: common.h:58
dccl::FieldCodecBase::name
std::string name() const
the name of the codec used to identifier it in the .proto custom option extension
Definition: field_codec.h:66
dccl::FieldCodecBase::dccl_field_options
dccl::DCCLFieldOptions dccl_field_options() const
Get the DCCL field option extension value for the current field.
Definition: field_codec.h:334
dccl::arith::ArithmeticFieldCodecBase::min_size_repeated
unsigned min_size_repeated() override
Give the min size of a repeated field.
Definition: field_codec_arithmetic.h:564
dccl::arith::ArithmeticFieldCodec< const google::protobuf::EnumValueDescriptor * >::pre_encode
Model::value_type pre_encode(const google::protobuf::EnumValueDescriptor *const &field_value) override
Convert from the FieldType representation (used in the Google Protobuf message) to the WireType repre...
Definition: field_codec_arithmetic.h:718