DCCL v4
codec.h
1 // Copyright 2009-2023:
2 // GobySoft, LLC (2013-)
3 // Massachusetts Institute of Technology (2007-2014)
4 // Community contributors (see AUTHORS file)
5 // File authors:
6 // Toby Schneider <toby@gobysoft.org>
7 // Nathan Knotts <nknotts@gmail.com>
8 // philboske <philboske@gmail.com>
9 // Chris Murphy <cmurphy@aphysci.com>
10 //
11 //
12 // This file is part of the Dynamic Compact Control Language Library
13 // ("DCCL").
14 //
15 // DCCL is free software: you can redistribute it and/or modify
16 // it under the terms of the GNU Lesser General Public License as published by
17 // the Free Software Foundation, either version 2.1 of the License, or
18 // (at your option) any later version.
19 //
20 // DCCL is distributed in the hope that it will be useful,
21 // but WITHOUT ANY WARRANTY; without even the implied warranty of
22 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 // GNU Lesser General Public License for more details.
24 //
25 // You should have received a copy of the GNU Lesser General Public License
26 // along with DCCL. If not, see <http://www.gnu.org/licenses/>.
27 #ifndef DCCL20091211H
28 #define DCCL20091211H
29 
30 #include <map>
31 #include <ostream>
32 #include <set>
33 #include <stdexcept>
34 #include <string>
35 #include <type_traits>
36 #include <vector>
37 
38 #include <google/protobuf/descriptor.h>
39 
40 #include <memory>
41 
42 #include "binary.h"
43 #include "dynamic_protobuf_manager.h"
44 #include "exception.h"
45 #include "field_codec.h"
46 #include "field_codec_fixed.h"
47 #include "logger.h"
48 
49 #include "codecs2/field_codec_default_message.h"
50 #include "codecs3/field_codec_default_message.h"
51 #include "dccl/def.h"
52 #include "field_codec_manager.h"
53 
55 namespace dccl
56 {
57 class FieldCodec;
58 
61 class Codec
62 {
63  public:
69  Codec(std::string dccl_id_codec_name = default_id_codec_name(),
70  const std::string& library_path = "");
71 
78  template <class IDFieldCodec,
79  typename std::enable_if<std::is_base_of<FieldCodecBase, IDFieldCodec>::value,
80  int>::type = 0>
81  Codec(const std::string& dccl_id_codec_name, const IDFieldCodec& dccl_id_codec) // NOLINT
82  : id_codec_(dccl_id_codec_name)
83  {
84  set_default_codecs();
85  manager_.add<IDFieldCodec>(dccl_id_codec_name);
86  }
87 
89  virtual ~Codec();
90 
91  Codec(const Codec&) = delete;
92  Codec& operator=(const Codec&) = delete;
93 
99  void load_library(void* dl_handle);
100 
107  void unload_library(void* dl_handle);
108 
113  void load_library(const std::string& library_path);
114 
119  template <typename ProtobufMessage> void load() { load(ProtobufMessage::descriptor()); }
120 
124  template <typename ProtobufMessage> void unload() { unload(ProtobufMessage::descriptor()); }
125 
126  void unload_all() { id2desc_.clear(); }
127 
134  void load(const google::protobuf::Descriptor* desc, int user_id = -1);
135 
140  void unload(const google::protobuf::Descriptor* desc);
141 
146  void unload(size_t dccl_id);
147 
149  void set_id_codec(const std::string& id_codec_name);
150  std::string get_id_codec() { return id_codec_; }
151 
157  void
158  set_crypto_passphrase(const std::string& passphrase,
159  const std::set<unsigned>& do_not_encrypt_ids_ = std::set<unsigned>());
160 
164  void set_strict(bool mode) { strict_ = mode; }
165 
169  void set_console_width(unsigned num_chars) { console_width_ = num_chars; }
170 
172 
176 
177 
184  template <typename ProtobufMessage>
185  void info(std::ostream* os = nullptr, int user_id = -1) const
186  {
187  info(ProtobufMessage::descriptor(), os, user_id);
188  }
189 
195  void info(const google::protobuf::Descriptor* desc, std::ostream* os = nullptr,
196  int user_id = -1) const;
197 
201  void info_all(std::ostream* os = nullptr) const;
202 
206  template <typename ProtobufMessage> unsigned id() const
207  {
208  return id(ProtobufMessage::descriptor());
209  }
210 
230  unsigned id(const std::string& bytes) const;
231 
233  template <typename CharIterator> unsigned id(CharIterator begin, CharIterator end) const;
234 
236  unsigned id(const google::protobuf::Descriptor* desc) const
237  {
238  Bitset id_bits;
239  dccl::uint32 hardcoded_id = desc->options().GetExtension(dccl::msg).id();
240  // pass the hard coded id, that is, (dccl.msg).id,
241  // through encode/decode to allow a custom ID codec (if in use)
242  // to always take effect.
243  id_codec()->field_encode(&id_bits, hardcoded_id, nullptr);
244  std::string id_bytes(id_bits.to_byte_string());
245  return id(id_bytes);
246  }
247 
249  const std::map<int32, const google::protobuf::Descriptor*>& loaded() const { return id2desc_; }
250 
252 
256 
257 
266  void encode(std::string* bytes, const google::protobuf::Message& msg, bool header_only = false,
267  int user_id = -1);
268 
279  size_t encode(char* bytes, size_t max_len, const google::protobuf::Message& msg,
280  bool header_only = false, int user_id = -1);
281 
290  template <typename CharIterator>
291  CharIterator decode(CharIterator begin, CharIterator end, google::protobuf::Message* msg,
292  bool header_only = false);
293 
300  void decode(const std::string& bytes, google::protobuf::Message* msg, bool header_only = false);
301 
307  void decode(std::string* bytes, google::protobuf::Message* msg);
308 
316  template <typename GoogleProtobufMessagePointer>
317  GoogleProtobufMessagePointer decode(const std::string& bytes, bool header_only = false);
318 
325  template <typename GoogleProtobufMessagePointer>
326  GoogleProtobufMessagePointer decode(std::string* bytes);
327 
334  unsigned size(const google::protobuf::Message& msg, int user_id = -1);
335 
340  template <typename ProtobufMessage> unsigned max_size()
341  {
342  return max_size(ProtobufMessage::descriptor());
343  }
344 
346  unsigned max_size(const google::protobuf::Descriptor* desc) const;
347 
352  template <typename ProtobufMessage> unsigned min_size()
353  {
354  return min_size(ProtobufMessage::descriptor());
355  }
356 
358  unsigned min_size(const google::protobuf::Descriptor* desc) const;
359 
361 
362  static std::string default_id_codec_name() { return "dccl.default.id"; }
363 
364  static std::string default_codec_name(int version = 2)
365  {
366  switch (version)
367  {
368  case 2:
369  return dccl::DCCLFieldOptions::descriptor()
370  ->FindFieldByName("codec")
371  ->default_value_string();
372  default: return "dccl.default" + std::to_string(version);
373  }
374  }
375 
376  FieldCodecManagerLocal& manager() { return manager_; }
377 
378  private:
379  void encode_internal(const google::protobuf::Message& msg, bool header_only,
380  Bitset& header_bits, Bitset& body_bits, int user_id);
381  std::string get_all_error_fields_in_message(const google::protobuf::Message& msg,
382  uint8_t depth = 1);
383 
384  void encrypt(std::string* s, const std::string& nonce);
385  void decrypt(std::string* s, const std::string& nonce);
386 
387  void set_default_codecs();
388 
389  std::shared_ptr<FieldCodecBase> id_codec() const
390  {
391  return manager_.find(google::protobuf::FieldDescriptor::TYPE_UINT32, id_codec_);
392  }
393 
394  private:
395  // SHA256 hash of the crypto passphrase
396  std::string crypto_key_;
397 
398  // strict mode setting
399  bool strict_{false};
400 
401  // console outputting format width
402  unsigned console_width_{60};
403 
404  // set of DCCL IDs *not* to encrypt
405  std::set<unsigned> skip_crypto_ids_;
406 
407  // maps `dccl.id`s onto Message Descriptors
408  std::map<int32, const google::protobuf::Descriptor*> id2desc_;
409  std::string id_codec_;
410 
411  std::vector<void*> dl_handles_;
412 
413  std::string build_guard_for_console_output(std::string& base, char guard_char) const;
414 
415  FieldCodecManagerLocal manager_;
416 };
417 
418 inline std::ostream& operator<<(std::ostream& os, const Codec& codec)
419 {
420  codec.info_all(&os);
421  return os;
422 }
423 } // namespace dccl
424 
425 template <typename GoogleProtobufMessagePointer>
426 GoogleProtobufMessagePointer dccl::Codec::decode(const std::string& bytes,
427  bool header_only /* = false */)
428 {
429  unsigned this_id = id(bytes);
430 
431  if (!id2desc_.count(this_id))
432  throw(Exception("Message id " + std::to_string(this_id) +
433  " has not been loaded. Call load() before decoding this type."));
434 
435  // ownership of this object goes to the caller of decode()
436  auto msg = dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(
437  id2desc_.find(this_id)->second);
438  decode(bytes, &(*msg), header_only);
439  return msg;
440 }
441 
442 template <typename GoogleProtobufMessagePointer>
443 GoogleProtobufMessagePointer dccl::Codec::decode(std::string* bytes)
444 {
445  unsigned this_id = id(*bytes);
446 
447  if (!id2desc_.count(this_id))
448  throw(Exception("Message id " + std::to_string(this_id) +
449  " has not been loaded. Call load() before decoding this type."));
450 
451  GoogleProtobufMessagePointer msg =
452  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(
453  id2desc_.find(this_id)->second);
454  std::string::iterator new_begin = decode(bytes->begin(), bytes->end(), &(*msg));
455  bytes->erase(bytes->begin(), new_begin);
456  return msg;
457 }
458 
459 template <typename CharIterator>
460 unsigned dccl::Codec::id(CharIterator begin, CharIterator end) const
461 {
462  unsigned id_min_size = 0, id_max_size = 0;
463  id_codec()->field_min_size(&id_min_size, nullptr);
464  id_codec()->field_max_size(&id_max_size, nullptr);
465 
466  if (std::distance(begin, end) < (id_min_size / BITS_IN_BYTE))
467  throw(Exception("Bytes passed (hex: " + hex_encode(begin, end) +
468  ") is too small to be a valid DCCL message"));
469 
470  Bitset fixed_header_bits;
471  fixed_header_bits.from_byte_stream(
472  begin, begin + (size_t)std::ceil(double(id_max_size) / BITS_IN_BYTE));
473 
474  Bitset these_bits(&fixed_header_bits);
475  these_bits.get_more_bits(id_min_size);
476 
477  dccl::any return_value;
478  id_codec()->field_decode(&these_bits, &return_value, nullptr);
479 
480  return dccl::any_cast<uint32>(return_value);
481 }
482 
483 template <typename CharIterator>
484 CharIterator dccl::Codec::decode(CharIterator begin, CharIterator end,
485  google::protobuf::Message* msg, bool header_only /*= false*/)
486 {
487  try
488  {
489  unsigned this_id = id(begin, end);
490 
491  dlog.is(logger::DEBUG1, logger::DECODE) &&
492  dlog << "Began decoding message of id: " << this_id << std::endl;
493 
494  if (!id2desc_.count(this_id))
495  throw(Exception("Message id " + std::to_string(this_id) +
496  " has not been loaded. Call load() before decoding this type."));
497 
498  const google::protobuf::Descriptor* desc = msg->GetDescriptor();
499 
500  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Type name: " << desc->full_name()
501  << std::endl;
502 
503  std::shared_ptr<FieldCodecBase> codec = manager_.find(desc);
504  std::shared_ptr<internal::FromProtoCppTypeBase> helper = manager_.type_helper().find(desc);
505 
506  CharIterator actual_end = end;
507  if (codec)
508  {
509  unsigned head_size_bits;
510  unsigned body_size_bits;
511  codec->base_max_size(&head_size_bits, desc, HEAD);
512  codec->base_max_size(&body_size_bits, desc, BODY);
513  unsigned id_size = 0;
514  id_codec()->field_size(&id_size, this_id, nullptr);
515  head_size_bits += id_size;
516 
517  unsigned head_size_bytes = ceil_bits2bytes(head_size_bits);
518  unsigned body_size_bytes = ceil_bits2bytes(body_size_bits);
519 
520  dlog.is(logger::DEBUG2, logger::DECODE) &&
521  dlog << "Head bytes (bits): " << head_size_bytes << "(" << head_size_bits
522  << "), max body bytes (bits): " << body_size_bytes << "(" << body_size_bits
523  << ")" << std::endl;
524 
525  CharIterator head_bytes_end = begin + head_size_bytes;
526  dlog.is(logger::DEBUG3, logger::DECODE) &&
527  dlog << "Unencrypted Head (hex): " << hex_encode(begin, head_bytes_end)
528  << std::endl;
529 
530  Bitset head_bits;
531  head_bits.from_byte_stream(begin, head_bytes_end);
532  dlog.is(logger::DEBUG3, logger::DECODE) &&
533  dlog << "Unencrypted Head (bin): " << head_bits << std::endl;
534 
535  // shift off ID bits
536  head_bits >>= id_size;
537 
538  dlog.is(logger::DEBUG3, logger::DECODE) &&
539  dlog << "Unencrypted Head after ID bits removal (bin): " << head_bits << std::endl;
540 
541  internal::MessageStack msg_stack(manager_.codec_data().root_message_,
542  manager_.codec_data().message_data_);
543  msg_stack.push(msg->GetDescriptor());
544 
545  codec->base_decode(&head_bits, msg, HEAD);
546  dlog.is(logger::DEBUG2, logger::DECODE) &&
547  dlog << "after header decode, message is: " << *msg << std::endl;
548 
549  if (header_only)
550  {
551  dlog.is(logger::DEBUG2, logger::DECODE) &&
552  dlog << "as requested, skipping decrypting and decoding body." << std::endl;
553  actual_end = head_bytes_end;
554  }
555  else
556  {
557  dlog.is(logger::DEBUG3, logger::DECODE) &&
558  dlog << "Encrypted Body (hex): " << hex_encode(head_bytes_end, end)
559  << std::endl;
560 
561  Bitset body_bits;
562  if (!crypto_key_.empty() && !skip_crypto_ids_.count(this_id))
563  {
564  std::string head_bytes(begin, head_bytes_end);
565  std::string body_bytes(head_bytes_end, end);
566  decrypt(&body_bytes, head_bytes);
567  dlog.is(logger::DEBUG3, logger::DECODE) &&
568  dlog << "Unencrypted Body (hex): " << hex_encode(body_bytes) << std::endl;
569  body_bits.from_byte_stream(body_bytes.begin(), body_bytes.end());
570  }
571  else
572  {
573  dlog.is(logger::DEBUG3, logger::DECODE) &&
574  dlog << "Unencrypted Body (hex): " << hex_encode(head_bytes_end, end)
575  << std::endl;
576  body_bits.from_byte_stream(head_bytes_end, end);
577  }
578 
579  dlog.is(logger::DEBUG3, logger::DECODE) &&
580  dlog << "Unencrypted Body (bin): " << body_bits << std::endl;
581 
582  codec->base_decode(&body_bits, msg, BODY);
583  dlog.is(logger::DEBUG2, logger::DECODE) &&
584  dlog << "after header & body decode, message is: " << *msg << std::endl;
585 
586  actual_end = end - body_bits.size() / BITS_IN_BYTE;
587  }
588  }
589  else
590  {
591  throw(Exception("Failed to find (dccl.msg).codec `" +
592  desc->options().GetExtension(dccl::msg).codec() + "`"),
593  desc);
594  }
595 
596  dlog.is(logger::DEBUG1, logger::DECODE) &&
597  dlog << "Successfully decoded message of type: " << desc->full_name() << std::endl;
598  return actual_end;
599  }
600  catch (std::exception& e)
601  {
602  std::stringstream ss;
603 
604  ss << "Message " << hex_encode(begin, end) << " failed to decode. Reason: " << e.what()
605  << std::endl;
606 
607  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << ss.str() << std::endl;
608  throw(Exception(ss.str()));
609  }
610 }
611 
612 #endif
dccl::Codec::id
unsigned id(const google::protobuf::Descriptor *desc) const
Provides the DCCL ID given a DCCL type.
Definition: codec.h:236
dccl::Bitset::get_more_bits
void get_more_bits(size_type num_bits)
Retrieve more bits from the parent Bitset.
Definition: bitset.h:419
dccl
Dynamic Compact Control Language namespace.
Definition: any.h:49
dccl::Logger::is
bool is(logger::Verbosity verbosity, logger::Group group=logger::GENERAL)
Indicates the verbosity of the Logger until the next std::flush or std::endl. The boolean return is u...
Definition: logger.h:191
dccl::Exception
Exception class for DCCL.
Definition: exception.h:47
dccl::Codec::unload
void unload()
Unload a given message.
Definition: codec.h:124
dccl::Codec::set_id_codec
void set_id_codec(const std::string &id_codec_name)
Set a different ID codec name (note that is calls unload_all() so all messages must be reloaded)
Definition: codec.cpp:769
dccl::FieldCodecManagerLocal::find
std::shared_ptr< FieldCodecBase > find(const google::protobuf::FieldDescriptor *field, bool has_codec_group, const std::string &codec_group) const
Find the codec for a given field. For embedded messages, prefers (dccl.field).codec (inside field) ov...
Definition: field_codec_manager.h:114
dccl::Codec::min_size
unsigned min_size()
Provides the encoded minimum size (in bytes) of msg.
Definition: codec.h:352
dccl::Codec::max_size
unsigned max_size()
Provides the encoded maximum size (in bytes) of msg.
Definition: codec.h:340
dccl::uint32
google::protobuf::uint32 uint32
an unsigned 32 bit integer
Definition: common.h:55
dccl::internal::MessageStack
Definition: field_codec_message_stack.h:72
dccl::Codec
The Dynamic CCL enCODer/DECoder. This is the main class you will use to load, encode and decode DCCL ...
Definition: codec.h:61
dccl::Codec::Codec
Codec(std::string dccl_id_codec_name=default_id_codec_name(), const std::string &library_path="")
Instantiate a Codec, optionally with a non-default identifier field codec (loaded via a shared librar...
Definition: codec.cpp:74
dccl::Codec::size
unsigned size(const google::protobuf::Message &msg, int user_id=-1)
Provides the encoded size (in bytes) of msg. This is useful if you need to know the size of a message...
Definition: codec.cpp:497
dccl::Codec::set_strict
void set_strict(bool mode)
Set "strict" mode where a dccl::OutOfRangeException will be thrown for encode if the value(s) provide...
Definition: codec.h:164
dccl::Codec::unload_library
void unload_library(void *dl_handle)
Remove codecs and/or unload messages present in the given shared library handle.
Definition: codec.cpp:708
dccl::hex_encode
void hex_encode(CharIterator begin, CharIterator end, std::string *out, bool upper_case=false)
Encodes a (little-endian) hexadecimal string from a byte string. Index 0 of begin is written to index...
Definition: binary.h:100
dccl::Bitset::from_byte_stream
void from_byte_stream(CharIterator begin, CharIterator end)
Sets the value of the Bitset to the contents of a byte string, where each character represents 8 bits...
Definition: bitset.h:340
dccl::Bitset
A variable size container of bits (subclassed from std::deque<bool>) with an optional hierarchy....
Definition: bitset.h:41
dccl::Codec::load_library
void load_library(void *dl_handle)
Add codecs and/or load messages present in the given shared library handle.
Definition: codec.cpp:696
dccl::FieldCodecManagerLocal::add
std::enable_if< std::is_base_of< google::protobuf::Message, typename Codec::wire_type >::value &&!std::is_same< google::protobuf::Message, typename Codec::wire_type >::value, void >::type add(const std::string &name)
Add a new field codec (used for codecs operating on statically generated Protobuf messages,...
Definition: field_codec_manager.h:298
dccl::Codec::encode
void encode(std::string *bytes, const google::protobuf::Message &msg, bool header_only=false, int user_id=-1)
Encodes a DCCL message.
Definition: codec.cpp:318
dccl::Codec::set_crypto_passphrase
void set_crypto_passphrase(const std::string &passphrase, const std::set< unsigned > &do_not_encrypt_ids_=std::set< unsigned >())
Set a passphrase to be used when encoded messages to encrypt them and to decrypt messages after decod...
Definition: codec.cpp:720
Message
dccl::Codec::Codec
Codec(const std::string &dccl_id_codec_name, const IDFieldCodec &dccl_id_codec)
Instantiate a Codec with a non-default identifier field codec (loaded directly).
Definition: codec.h:81
dccl::Codec::info_all
void info_all(std::ostream *os=nullptr) const
Writes a human readable summary (including field sizes) of all the loaded (validated) DCCL types.
Definition: codec.cpp:744
dccl::Codec::load
void load()
All messages must be explicited loaded and validated (size checks, option extensions checks,...
Definition: codec.h:119
dccl::Codec::info
void info(std::ostream *os=nullptr, int user_id=-1) const
Writes a human readable summary (including field sizes) of the provided DCCL type to the stream provi...
Definition: codec.h:185
dccl::Codec::loaded
const std::map< int32, const google::protobuf::Descriptor * > & loaded() const
Provides a map of all loaded DCCL IDs to the equivalent Protobuf descriptor.
Definition: codec.h:249
dccl::Codec::set_console_width
void set_console_width(unsigned num_chars)
Set the number of characters used in programmatic generation of console outputs.
Definition: codec.h:169
dccl::Codec::decode
CharIterator decode(CharIterator begin, CharIterator end, google::protobuf::Message *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:484
dccl::Codec::id
unsigned id() const
Gives the DCCL id (defined by the custom message option extension "(dccl.msg).id" in the ....
Definition: codec.h:206
dccl::Codec::~Codec
virtual ~Codec()
Destructor.
Definition: codec.cpp:86
dccl::Bitset::to_byte_string
std::string to_byte_string()
Returns the value of the Bitset to a byte string, where each character represents 8 bits of the Bitse...
Definition: bitset.h:296