DCCL v4
codec.h
1 // Copyright 2009-2017 Toby Schneider (http://gobysoft.org/index.wt/people/toby)
2 // GobySoft, LLC (for 2013-)
3 // Massachusetts Institute of Technology (for 2007-2014)
4 // Community contributors (see AUTHORS file)
5 //
6 //
7 // This file is part of the Dynamic Compact Control Language Library
8 // ("DCCL").
9 //
10 // DCCL is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 2.1 of the License, or
13 // (at your option) any later version.
14 //
15 // DCCL is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with DCCL. If not, see <http://www.gnu.org/licenses/>.
22 #ifndef DCCL20091211H
23 #define DCCL20091211H
24 
25 #include <string>
26 #include <set>
27 #include <map>
28 #include <ostream>
29 #include <stdexcept>
30 #include <vector>
31 
32 #include <google/protobuf/descriptor.h>
33 
34 #include <boost/shared_ptr.hpp>
35 
36 #include "binary.h"
37 #include "dynamic_protobuf_manager.h"
38 #include "logger.h"
39 #include "exception.h"
40 #include "field_codec.h"
41 #include "field_codec_fixed.h"
42 
43 #include "codecs2/field_codec_default_message.h"
44 #include "codecs3/field_codec_default_message.h"
45 #include "field_codec_manager.h"
46 
47 #define DCCL_HAS_CRYPTOPP 1
48 
50 namespace dccl
51 {
52  class FieldCodec;
53 
56  class Codec
57  {
58  public:
64  Codec(const std::string& dccl_id_codec = default_id_codec_name(),
65  const std::string& library_path = "");
66 
68  virtual ~Codec();
69 
75  void load_library(void* dl_handle);
76 
83  void unload_library(void* dl_handle);
84 
89  void load_library(const std::string& library_path);
90 
95  template<typename ProtobufMessage>
96  void load()
97  { load(ProtobufMessage::descriptor()); }
98 
102  template<typename ProtobufMessage>
103  void unload()
104  { unload(ProtobufMessage::descriptor()); }
105 
106 
107  void unload_all()
108  { id2desc_.clear(); }
109 
116  void load(const google::protobuf::Descriptor* desc, int user_id = - 1);
117 
122  void unload(const google::protobuf::Descriptor* desc);
123 
128  void unload(size_t dccl_id);
129 
131  void set_id_codec(const std::string& id_codec_name);
132  std::string get_id_codec() { return id_codec_; }
133 
134 
140  void set_crypto_passphrase(const std::string& passphrase,
141  const std::set<unsigned>& do_not_encrypt_ids_ = std::set<unsigned>());
142 
143 
147  void set_strict(bool mode) { strict_ = mode; }
148 
149 
153  void set_console_width(unsigned num_chars) { console_width_ = num_chars; }
154 
155 
157 
161 
162 
169  template<typename ProtobufMessage>
170  void info(std::ostream* os = 0, int user_id = -1) const
171  { info(ProtobufMessage::descriptor(), os, user_id); }
172 
178  void info(const google::protobuf::Descriptor* desc, std::ostream* os = 0, int user_id = - 1) const;
179 
183  void info_all(std::ostream* os = 0) const;
184 
188  template <typename ProtobufMessage>
189  unsigned id() const
190  { return id(ProtobufMessage::descriptor()); }
191 
211  unsigned id(const std::string& bytes) const;
212 
214  template<typename CharIterator>
215  unsigned id(CharIterator begin, CharIterator end) const;
216 
218  unsigned id(const google::protobuf::Descriptor* desc) const {
219  Bitset id_bits;
220  dccl::uint32 hardcoded_id = desc->options().GetExtension(dccl::msg).id();
221  // pass the hard coded id, that is, (dccl.msg).id,
222  // through encode/decode to allow a custom ID codec (if in use)
223  // to always take effect.
224  id_codec()->field_encode(&id_bits, hardcoded_id, 0);
225  std::string id_bytes(id_bits.to_byte_string());
226  return id(id_bytes);
227  }
228 
230  const std::map<int32, const google::protobuf::Descriptor*>& loaded() const { return id2desc_; }
231 
233 
234 
238 
239 
248  void encode(std::string* bytes, const google::protobuf::Message& msg, bool header_only = false, int user_id = -1);
249 
260  size_t encode(char* bytes, size_t max_len, const google::protobuf::Message& msg, bool header_only = false, int user_id = -1);
261 
270  template <typename CharIterator>
271  CharIterator decode(CharIterator begin, CharIterator end, google::protobuf::Message* msg, bool header_only = false);
272 
279  void decode(const std::string& bytes, google::protobuf::Message* msg, bool header_only = false);
280 
286  void decode(std::string* bytes, google::protobuf::Message* msg);
287 
295  template<typename GoogleProtobufMessagePointer>
296  GoogleProtobufMessagePointer decode(const std::string& bytes, bool header_only = false);
297 
304  template<typename GoogleProtobufMessagePointer>
305  GoogleProtobufMessagePointer decode(std::string* bytes);
306 
313  unsigned size(const google::protobuf::Message& msg, int user_id = -1);
314 
319  template<typename ProtobufMessage>
320  unsigned max_size()
321  { return max_size(ProtobufMessage::descriptor()); }
322 
324  unsigned max_size(const google::protobuf::Descriptor* desc) const;
325 
330  template<typename ProtobufMessage>
331  unsigned min_size()
332  { return min_size(ProtobufMessage::descriptor()); }
333 
335  unsigned min_size(const google::protobuf::Descriptor* desc) const;
336 
337 
339 
340 
341  static std::string default_id_codec_name()
342  { return "dccl.default.id"; }
343 
344 
345  static std::string default_codec_name(int version = 2)
346  {
347  switch(version)
348  {
349  case 2:
350  return dccl::DCCLFieldOptions::descriptor()->FindFieldByName("codec")->default_value_string();
351  default:
352  return "dccl.default" + boost::lexical_cast<std::string>(version);
353  }
354 
355  }
356 
357 
358  private:
359  friend class v2::DefaultMessageCodec;
360  Codec(const Codec&);
361  Codec& operator= (const Codec&);
362 
363  void encode_internal(const google::protobuf::Message& msg, bool header_only, Bitset& header_bits, Bitset& body_bits, int user_id);
364  std::string get_all_error_fields_in_message(
365  const google::protobuf::Message& msg,
366  uint8_t depth = 1);
367 
368  void encrypt(std::string* s, const std::string& nonce);
369  void decrypt(std::string* s, const std::string& nonce);
370 
371  void set_default_codecs();
372 
373  boost::shared_ptr<FieldCodecBase> id_codec() const
374  {
375  return FieldCodecManager::find(google::protobuf::FieldDescriptor::TYPE_UINT32,
376  id_codec_);
377  }
378 
379 
380  private:
381  // SHA256 hash of the crypto passphrase
382  std::string crypto_key_;
383 
384  // strict mode setting
385  bool strict_;
386 
387  // console outputting format width
388  unsigned console_width_;
389 
390  // set of DCCL IDs *not* to encrypt
391  std::set<unsigned> skip_crypto_ids_;
392 
393  // maps `dccl.id`s onto Message Descriptors
394  std::map<int32, const google::protobuf::Descriptor*> id2desc_;
395  std::string id_codec_;
396 
397  std::vector<void *> dl_handles_;
398 
399  std::string build_guard_for_console_output(std::string& base, char guard_char) const;
400  };
401 
402  inline std::ostream& operator<<(std::ostream& os, const Codec& codec)
403  {
404  codec.info_all(&os);
405  return os;
406  }
407 }
408 
409 template<typename GoogleProtobufMessagePointer>
410 GoogleProtobufMessagePointer dccl::Codec::decode(const std::string& bytes, bool header_only /* = false */)
411 {
412  unsigned this_id = id(bytes);
413 
414  if(!id2desc_.count(this_id))
415  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
416 
417  // ownership of this object goes to the caller of decode()
418  GoogleProtobufMessagePointer msg =
419  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(id2desc_.find(this_id)->second);
420  decode(bytes, &(*msg), header_only);
421  return msg;
422 }
423 
424 template<typename GoogleProtobufMessagePointer>
425 GoogleProtobufMessagePointer dccl::Codec::decode(std::string* bytes)
426 {
427  unsigned this_id = id(*bytes);
428 
429  if(!id2desc_.count(this_id))
430  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
431 
432  GoogleProtobufMessagePointer msg =
433  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(id2desc_.find(this_id)->second);
434  std::string::iterator new_begin = decode(bytes->begin(), bytes->end(), &(*msg));
435  bytes->erase(bytes->begin(), new_begin);
436  return msg;
437 }
438 
439 template<typename CharIterator>
440 unsigned dccl::Codec::id(CharIterator begin, CharIterator end) const
441 {
442  unsigned id_min_size = 0, id_max_size = 0;
443  id_codec()->field_min_size(&id_min_size, 0);
444  id_codec()->field_max_size(&id_max_size, 0);
445 
446  if(std::distance(begin, end) < (id_min_size / BITS_IN_BYTE))
447  throw(Exception("Bytes passed (hex: " + hex_encode(begin, end) + ") is too small to be a valid DCCL message"));
448 
449  Bitset fixed_header_bits;
450  fixed_header_bits.from_byte_stream(begin, begin+(size_t)std::ceil(double(id_max_size) / BITS_IN_BYTE));
451 
452  Bitset these_bits(&fixed_header_bits);
453  these_bits.get_more_bits(id_min_size);
454 
455  boost::any return_value;
456  id_codec()->field_decode(&these_bits, &return_value, 0);
457 
458  return boost::any_cast<uint32>(return_value);
459 }
460 
461 template <typename CharIterator>
462 CharIterator dccl::Codec::decode(CharIterator begin, CharIterator end, google::protobuf::Message* msg, bool header_only /*= false*/)
463 {
464  try
465  {
466  unsigned this_id = id(begin, end);
467 
468  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Began decoding message of id: " << this_id << std::endl;
469 
470  if(!id2desc_.count(this_id))
471  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
472 
473  const google::protobuf::Descriptor* desc = msg->GetDescriptor();
474 
475  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Type name: " << desc->full_name() << std::endl;
476 
477  boost::shared_ptr<FieldCodecBase> codec = FieldCodecManager::find(desc);
478  boost::shared_ptr<internal::FromProtoCppTypeBase> helper = internal::TypeHelper::find(desc);
479 
480  CharIterator actual_end = end;
481  if(codec)
482  {
483  unsigned head_size_bits;
484  unsigned body_size_bits;
485  codec->base_max_size(&head_size_bits, desc, HEAD);
486  codec->base_max_size(&body_size_bits, desc, BODY);
487  unsigned id_size = 0;
488  id_codec()->field_size(&id_size, this_id, 0);
489  head_size_bits += id_size;
490 
491  unsigned head_size_bytes = ceil_bits2bytes(head_size_bits);
492  unsigned body_size_bytes = ceil_bits2bytes(body_size_bits);
493 
494  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "Head bytes (bits): " << head_size_bytes << "(" << head_size_bits
495  << "), max body bytes (bits): " << body_size_bytes << "(" << body_size_bits << ")" << std::endl;
496 
497  CharIterator head_bytes_end = begin + head_size_bytes;
498  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head (hex): " << hex_encode(begin, head_bytes_end) << std::endl;
499 
500  Bitset head_bits;
501  head_bits.from_byte_stream(begin, head_bytes_end);
502  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head (bin): " << head_bits << std::endl;
503 
504  // shift off ID bits
505  head_bits >>= id_size;
506 
507  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head after ID bits removal (bin): " << head_bits << std::endl;
508 
509  internal::MessageStack msg_stack;
510  msg_stack.push(msg->GetDescriptor());
511 
512  codec->base_decode(&head_bits, msg, HEAD);
513  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "after header decode, message is: " << *msg << std::endl;
514 
515 
516  if(header_only)
517  {
518  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "as requested, skipping decrypting and decoding body." << std::endl;
519  actual_end = head_bytes_end;
520  }
521  else
522  {
523  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Encrypted Body (hex): " << hex_encode(head_bytes_end, end) << std::endl;
524 
525  Bitset body_bits;
526  if(!crypto_key_.empty() && !skip_crypto_ids_.count(this_id))
527  {
528  std::string head_bytes(begin, head_bytes_end);
529  std::string body_bytes(head_bytes_end, end);
530  decrypt(&body_bytes, head_bytes);
531  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (hex): " << hex_encode(body_bytes) << std::endl;
532  body_bits.from_byte_stream(body_bytes.begin(), body_bytes.end());
533  }
534  else
535  {
536  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (hex): " << hex_encode(head_bytes_end, end) << std::endl;
537  body_bits.from_byte_stream(head_bytes_end, end);
538  }
539 
540  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (bin): " << body_bits << std::endl;
541 
542  codec->base_decode(&body_bits, msg, BODY);
543  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "after header & body decode, message is: " << *msg << std::endl;
544 
545  actual_end = end - body_bits.size()/BITS_IN_BYTE;
546  }
547  }
548  else
549  {
550  throw(Exception("Failed to find (dccl.msg).codec `" + desc->options().GetExtension(dccl::msg).codec() + "`"));
551  }
552 
553  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Successfully decoded message of type: " << desc->full_name() << std::endl;
554  return actual_end;
555  }
556  catch(std::exception& e)
557  {
558  std::stringstream ss;
559 
560  ss << "Message " << hex_encode(begin, end) << " failed to decode. Reason: " << e.what() << std::endl;
561 
562  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << ss.str() << std::endl;
563  throw(Exception(ss.str()));
564  }
565 }
566 
567 #endif
dccl::Codec::id
unsigned id(const google::protobuf::Descriptor *desc) const
Provides the DCCL ID given a DCCL type.
Definition: codec.h:218
dccl::Bitset::get_more_bits
void get_more_bits(size_type num_bits)
Retrieve more bits from the parent Bitset.
Definition: bitset.h:447
dccl
Dynamic Compact Control Language namespace.
Definition: gen_units_class_plugin.h:49
dccl::Logger::is
bool is(logger::Verbosity verbosity, logger::Group group=logger::GENERAL)
Indicates the verbosity of the Logger until the next std::flush or std::endl. The boolean return is u...
Definition: logger.h:145
dccl::Exception
Exception class for DCCL.
Definition: exception.h:31
dccl::Codec::unload
void unload()
Unload a given message.
Definition: codec.h:103
dccl::Codec::set_id_codec
void set_id_codec(const std::string &id_codec_name)
Set a different ID codec name (note that is calls unload_all() so all messages must be reloaded)
Definition: codec.cpp:762
dccl::Codec::min_size
unsigned min_size()
Provides the encoded minimum size (in bytes) of msg.
Definition: codec.h:331
dccl::Codec::max_size
unsigned max_size()
Provides the encoded maximum size (in bytes) of msg.
Definition: codec.h:320
dccl::uint32
google::protobuf::uint32 uint32
an unsigned 32 bit integer
Definition: common.h:55
dccl::internal::MessageStack
Definition: field_codec_message_stack.h:41
dccl::Codec
The Dynamic CCL enCODer/DECoder. This is the main class you will use to load, encode and decode DCCL ...
Definition: codec.h:56
dccl::Codec::size
unsigned size(const google::protobuf::Message &msg, int user_id=-1)
Provides the encoded size (in bytes) of msg. This is useful if you need to know the size of a message...
Definition: codec.cpp:497
dccl::Codec::set_strict
void set_strict(bool mode)
Set "strict" mode where a dccl::OutOfRangeException will be thrown for encode if the value(s) provide...
Definition: codec.h:147
dccl::Codec::unload_library
void unload_library(void *dl_handle)
Remove codecs and/or unload messages present in the given shared library handle.
Definition: codec.cpp:703
dccl::hex_encode
void hex_encode(CharIterator begin, CharIterator end, std::string *out, bool upper_case=false)
Encodes a (little-endian) hexadecimal string from a byte string. Index 0 of begin is written to index...
Definition: binary.h:100
dccl::Bitset::from_byte_stream
void from_byte_stream(CharIterator begin, CharIterator end)
Sets the value of the Bitset to the contents of a byte string, where each character represents 8 bits...
Definition: bitset.h:358
dccl::Bitset
A variable size container of bits (subclassed from std::deque<bool>) with an optional hierarchy....
Definition: bitset.h:38
dccl::Codec::load_library
void load_library(void *dl_handle)
Add codecs and/or load messages present in the given shared library handle.
Definition: codec.cpp:691
dccl::FieldCodecManager::find
static boost::shared_ptr< FieldCodecBase > find(const google::protobuf::FieldDescriptor *field, bool has_codec_group, const std::string &codec_group)
Find the codec for a given field. For embedded messages, prefers (dccl.field).codec (inside field) ov...
Definition: field_codec_manager.h:121
dccl::Codec::encode
void encode(std::string *bytes, const google::protobuf::Message &msg, bool header_only=false, int user_id=-1)
Encodes a DCCL message.
Definition: codec.cpp:331
dccl::Codec::set_crypto_passphrase
void set_crypto_passphrase(const std::string &passphrase, const std::set< unsigned > &do_not_encrypt_ids_=std::set< unsigned >())
Set a passphrase to be used when encoded messages to encrypt them and to decrypt messages after decod...
Definition: codec.cpp:715
Message
dccl::Codec::info
void info(std::ostream *os=0, int user_id=-1) const
Writes a human readable summary (including field sizes) of the provided DCCL type to the stream provi...
Definition: codec.h:170
dccl::Codec::load
void load()
All messages must be explicited loaded and validated (size checks, option extensions checks,...
Definition: codec.h:96
dccl::Codec::loaded
const std::map< int32, const google::protobuf::Descriptor * > & loaded() const
Provides a map of all loaded DCCL IDs to the equivalent Protobuf descriptor.
Definition: codec.h:230
dccl::Codec::set_console_width
void set_console_width(unsigned num_chars)
Set the number of characters used in programmatic generation of console outputs.
Definition: codec.h:153
dccl::Codec::Codec
Codec(const std::string &dccl_id_codec=default_id_codec_name(), const std::string &library_path="")
Instantiate a Codec, optionally with a non-default identifier field codec.
Definition: codec.cpp:66
dccl::Codec::decode
CharIterator decode(CharIterator begin, CharIterator end, google::protobuf::Message *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:462
dccl::Codec::id
unsigned id() const
Gives the DCCL id (defined by the custom message option extension "(dccl.msg).id" in the ....
Definition: codec.h:189
dccl::Codec::info_all
void info_all(std::ostream *os=0) const
Writes a human readable summary (including field sizes) of all the loaded (validated) DCCL types.
Definition: codec.cpp:739
dccl::Codec::~Codec
virtual ~Codec()
Destructor.
Definition: codec.cpp:78
dccl::Bitset::to_byte_string
std::string to_byte_string()
Returns the value of the Bitset to a byte string, where each character represents 8 bits of the Bitse...
Definition: bitset.h:310