DCCL v3
codec.h
1 // Copyright 2009-2017 Toby Schneider (http://gobysoft.org/index.wt/people/toby)
2 // GobySoft, LLC (for 2013-)
3 // Massachusetts Institute of Technology (for 2007-2014)
4 // Community contributors (see AUTHORS file)
5 //
6 //
7 // This file is part of the Dynamic Compact Control Language Library
8 // ("DCCL").
9 //
10 // DCCL is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 2.1 of the License, or
13 // (at your option) any later version.
14 //
15 // DCCL is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with DCCL. If not, see <http://www.gnu.org/licenses/>.
22 #ifndef DCCL20091211H
23 #define DCCL20091211H
24 
25 #include <string>
26 #include <set>
27 #include <map>
28 #include <ostream>
29 #include <stdexcept>
30 #include <vector>
31 
32 #include <google/protobuf/descriptor.h>
33 
34 #include <boost/shared_ptr.hpp>
35 
36 #include "binary.h"
37 #include "dynamic_protobuf_manager.h"
38 #include "logger.h"
39 #include "exception.h"
40 #include "field_codec.h"
41 #include "field_codec_fixed.h"
42 
43 #include "codecs2/field_codec_default_message.h"
44 #include "codecs3/field_codec_default_message.h"
45 #include "field_codec_manager.h"
46 
47 #define DCCL_HAS_CRYPTOPP 1
48 
50 namespace dccl
51 {
52  class FieldCodec;
53 
56  class Codec
57  {
58  public:
64  Codec(const std::string& dccl_id_codec = default_id_codec_name(),
65  const std::string& library_path = "");
66 
68  virtual ~Codec();
69 
75  void load_library(void* dl_handle);
76 
83  void unload_library(void* dl_handle);
84 
89  void load_library(const std::string& library_path);
90 
95  template<typename ProtobufMessage>
96  void load()
97  { load(ProtobufMessage::descriptor()); }
98 
102  template<typename ProtobufMessage>
103  void unload()
104  { unload(ProtobufMessage::descriptor()); }
105 
106 
107  void unload_all()
108  { id2desc_.clear(); }
109 
116  void load(const google::protobuf::Descriptor* desc, int user_id = - 1);
117 
122  void unload(const google::protobuf::Descriptor* desc);
123 
128  void unload(size_t dccl_id);
129 
131  void set_id_codec(const std::string& id_codec_name);
132  std::string get_id_codec() { return id_codec_; }
133 
134 
140  void set_crypto_passphrase(const std::string& passphrase,
141  const std::set<unsigned>& do_not_encrypt_ids_ = std::set<unsigned>());
142 
143 
147  void set_strict(bool mode) { strict_ = mode; }
148 
150 
154 
155 
162  template<typename ProtobufMessage>
163  void info(std::ostream* os = 0, int user_id = -1) const
164  { info(ProtobufMessage::descriptor(), os, user_id); }
165 
171  void info(const google::protobuf::Descriptor* desc, std::ostream* os = 0, int user_id = - 1) const;
172 
176  void info_all(std::ostream* os = 0) const;
177 
181  template <typename ProtobufMessage>
182  unsigned id() const
183  { return id(ProtobufMessage::descriptor()); }
184 
204  unsigned id(const std::string& bytes) const;
205 
207  template<typename CharIterator>
208  unsigned id(CharIterator begin, CharIterator end) const;
209 
211  unsigned id(const google::protobuf::Descriptor* desc) const {
212  Bitset id_bits;
213  dccl::uint32 hardcoded_id = desc->options().GetExtension(dccl::msg).id();
214  // pass the hard coded id, that is, (dccl.msg).id,
215  // through encode/decode to allow a custom ID codec (if in use)
216  // to always take effect.
217  id_codec()->field_encode(&id_bits, hardcoded_id, 0);
218  std::string id_bytes(id_bits.to_byte_string());
219  return id(id_bytes);
220  }
221 
223  const std::map<int32, const google::protobuf::Descriptor*>& loaded() const { return id2desc_; }
224 
226 
227 
231 
232 
241  void encode(std::string* bytes, const google::protobuf::Message& msg, bool header_only = false, int user_id = -1);
242 
253  size_t encode(char* bytes, size_t max_len, const google::protobuf::Message& msg, bool header_only = false, int user_id = -1);
254 
263  template <typename CharIterator>
264  CharIterator decode(CharIterator begin, CharIterator end, google::protobuf::Message* msg, bool header_only = false);
265 
272  void decode(const std::string& bytes, google::protobuf::Message* msg, bool header_only = false);
273 
279  void decode(std::string* bytes, google::protobuf::Message* msg);
280 
288  template<typename GoogleProtobufMessagePointer>
289  GoogleProtobufMessagePointer decode(const std::string& bytes, bool header_only = false);
290 
297  template<typename GoogleProtobufMessagePointer>
298  GoogleProtobufMessagePointer decode(std::string* bytes);
299 
306  unsigned size(const google::protobuf::Message& msg, int user_id = -1);
307 
312  template<typename ProtobufMessage>
313  unsigned max_size()
314  { return max_size(ProtobufMessage::descriptor()); }
315 
317  unsigned max_size(const google::protobuf::Descriptor* desc) const;
318 
323  template<typename ProtobufMessage>
324  unsigned min_size()
325  { return min_size(ProtobufMessage::descriptor()); }
326 
328  unsigned min_size(const google::protobuf::Descriptor* desc) const;
329 
330 
332 
333 
334  static std::string default_id_codec_name()
335  { return "dccl.default.id"; }
336 
337 
338  static std::string default_codec_name(int version = 2)
339  {
340  switch(version)
341  {
342  case 2:
343  return dccl::DCCLFieldOptions::descriptor()->FindFieldByName("codec")->default_value_string();
344  default:
345  return "dccl.default" + boost::lexical_cast<std::string>(version);
346  }
347 
348  }
349 
350 
351  private:
352  friend class v2::DefaultMessageCodec;
353  Codec(const Codec&);
354  Codec& operator= (const Codec&);
355 
356  void encode_internal(const google::protobuf::Message& msg, bool header_only, Bitset& header_bits, Bitset& body_bits, int user_id);
357 
358  void encrypt(std::string* s, const std::string& nonce);
359  void decrypt(std::string* s, const std::string& nonce);
360 
361  void set_default_codecs();
362 
363  boost::shared_ptr<FieldCodecBase> id_codec() const
364  {
365  return FieldCodecManager::find(google::protobuf::FieldDescriptor::TYPE_UINT32,
366  id_codec_);
367  }
368 
369  private:
370  // SHA256 hash of the crypto passphrase
371  std::string crypto_key_;
372 
373  // strict mode setting
374  bool strict_;
375 
376  // set of DCCL IDs *not* to encrypt
377  std::set<unsigned> skip_crypto_ids_;
378 
379  // maps `dccl.id`s onto Message Descriptors
380  std::map<int32, const google::protobuf::Descriptor*> id2desc_;
381  std::string id_codec_;
382 
383  std::vector<void *> dl_handles_;
384 
385  };
386 
387  inline std::ostream& operator<<(std::ostream& os, const Codec& codec)
388  {
389  codec.info_all(&os);
390  return os;
391  }
392 }
393 
394 template<typename GoogleProtobufMessagePointer>
395 GoogleProtobufMessagePointer dccl::Codec::decode(const std::string& bytes, bool header_only /* = false */)
396 {
397  unsigned this_id = id(bytes);
398 
399  if(!id2desc_.count(this_id))
400  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
401 
402  // ownership of this object goes to the caller of decode()
403  GoogleProtobufMessagePointer msg =
404  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(id2desc_.find(this_id)->second);
405  decode(bytes, &(*msg), header_only);
406  return msg;
407 }
408 
409 template<typename GoogleProtobufMessagePointer>
410 GoogleProtobufMessagePointer dccl::Codec::decode(std::string* bytes)
411 {
412  unsigned this_id = id(*bytes);
413 
414  if(!id2desc_.count(this_id))
415  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
416 
417  GoogleProtobufMessagePointer msg =
418  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(id2desc_.find(this_id)->second);
419  std::string::iterator new_begin = decode(bytes->begin(), bytes->end(), &(*msg));
420  bytes->erase(bytes->begin(), new_begin);
421  return msg;
422 }
423 
424 template<typename CharIterator>
425 unsigned dccl::Codec::id(CharIterator begin, CharIterator end) const
426 {
427  unsigned id_min_size = 0, id_max_size = 0;
428  id_codec()->field_min_size(&id_min_size, 0);
429  id_codec()->field_max_size(&id_max_size, 0);
430 
431  if(std::distance(begin, end) < (id_min_size / BITS_IN_BYTE))
432  throw(Exception("Bytes passed (hex: " + hex_encode(begin, end) + ") is too small to be a valid DCCL message"));
433 
434  Bitset fixed_header_bits;
435  fixed_header_bits.from_byte_stream(begin, begin+(size_t)std::ceil(double(id_max_size) / BITS_IN_BYTE));
436 
437  Bitset these_bits(&fixed_header_bits);
438  these_bits.get_more_bits(id_min_size);
439 
440  boost::any return_value;
441  id_codec()->field_decode(&these_bits, &return_value, 0);
442 
443  return boost::any_cast<uint32>(return_value);
444 }
445 
446 template <typename CharIterator>
447 CharIterator dccl::Codec::decode(CharIterator begin, CharIterator end, google::protobuf::Message* msg, bool header_only /*= false*/)
448 {
449  try
450  {
451  unsigned this_id = id(begin, end);
452 
453  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Began decoding message of id: " << this_id << std::endl;
454 
455  if(!id2desc_.count(this_id))
456  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
457 
458  const google::protobuf::Descriptor* desc = msg->GetDescriptor();
459 
460  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Type name: " << desc->full_name() << std::endl;
461 
462  boost::shared_ptr<FieldCodecBase> codec = FieldCodecManager::find(desc);
463  boost::shared_ptr<internal::FromProtoCppTypeBase> helper = internal::TypeHelper::find(desc);
464 
465  CharIterator actual_end = end;
466  if(codec)
467  {
468  unsigned head_size_bits;
469  unsigned body_size_bits;
470  codec->base_max_size(&head_size_bits, desc, HEAD);
471  codec->base_max_size(&body_size_bits, desc, BODY);
472  unsigned id_size = 0;
473  id_codec()->field_size(&id_size, this_id, 0);
474  head_size_bits += id_size;
475 
476  unsigned head_size_bytes = ceil_bits2bytes(head_size_bits);
477  unsigned body_size_bytes = ceil_bits2bytes(body_size_bits);
478 
479  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "Head bytes (bits): " << head_size_bytes << "(" << head_size_bits
480  << "), max body bytes (bits): " << body_size_bytes << "(" << body_size_bits << ")" << std::endl;
481 
482  CharIterator head_bytes_end = begin + head_size_bytes;
483  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head (hex): " << hex_encode(begin, head_bytes_end) << std::endl;
484 
485  Bitset head_bits;
486  head_bits.from_byte_stream(begin, head_bytes_end);
487  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head (bin): " << head_bits << std::endl;
488 
489  // shift off ID bits
490  head_bits >>= id_size;
491 
492  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head after ID bits removal (bin): " << head_bits << std::endl;
493 
494  internal::MessageStack msg_stack;
495  msg_stack.push(msg->GetDescriptor());
496 
497  codec->base_decode(&head_bits, msg, HEAD);
498  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "after header decode, message is: " << *msg << std::endl;
499 
500 
501  if(header_only)
502  {
503  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "as requested, skipping decrypting and decoding body." << std::endl;
504  actual_end = head_bytes_end;
505  }
506  else
507  {
508  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Encrypted Body (hex): " << hex_encode(head_bytes_end, end) << std::endl;
509 
510  Bitset body_bits;
511  if(!crypto_key_.empty() && !skip_crypto_ids_.count(this_id))
512  {
513  std::string head_bytes(begin, head_bytes_end);
514  std::string body_bytes(head_bytes_end, end);
515  decrypt(&body_bytes, head_bytes);
516  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (hex): " << hex_encode(body_bytes) << std::endl;
517  body_bits.from_byte_stream(body_bytes.begin(), body_bytes.end());
518  }
519  else
520  {
521  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (hex): " << hex_encode(head_bytes_end, end) << std::endl;
522  body_bits.from_byte_stream(head_bytes_end, end);
523  }
524 
525  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (bin): " << body_bits << std::endl;
526 
527  codec->base_decode(&body_bits, msg, BODY);
528  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "after header & body decode, message is: " << *msg << std::endl;
529 
530  actual_end = end - body_bits.size()/BITS_IN_BYTE;
531  }
532  }
533  else
534  {
535  throw(Exception("Failed to find (dccl.msg).codec `" + desc->options().GetExtension(dccl::msg).codec() + "`"));
536  }
537 
538  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Successfully decoded message of type: " << desc->full_name() << std::endl;
539  return actual_end;
540  }
541  catch(std::exception& e)
542  {
543  std::stringstream ss;
544 
545  ss << "Message " << hex_encode(begin, end) << " failed to decode. Reason: " << e.what() << std::endl;
546 
547  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << ss.str() << std::endl;
548  throw(Exception(ss.str()));
549  }
550 }
551 
552 #endif
dccl::Codec::id
unsigned id(const google::protobuf::Descriptor *desc) const
Provides the DCCL ID given a DCCL type.
Definition: codec.h:211
dccl::Bitset::get_more_bits
void get_more_bits(size_type num_bits)
Retrieve more bits from the parent Bitset.
Definition: bitset.h:447
dccl
Dynamic Compact Control Language namespace.
Definition: gen_units_class_plugin.h:49
dccl::Logger::is
bool is(logger::Verbosity verbosity, logger::Group group=logger::GENERAL)
Indicates the verbosity of the Logger until the next std::flush or std::endl. The boolean return is u...
Definition: logger.h:145
dccl::Exception
Exception class for DCCL.
Definition: exception.h:31
dccl::Codec::unload
void unload()
Unload a given message.
Definition: codec.h:103
dccl::Codec::set_id_codec
void set_id_codec(const std::string &id_codec_name)
Set a different ID codec name (note that is calls unload_all() so all messages must be reloaded)
Definition: codec.cpp:682
dccl::Codec::min_size
unsigned min_size()
Provides the encoded minimum size (in bytes) of msg.
Definition: codec.h:324
dccl::Codec::max_size
unsigned max_size()
Provides the encoded maximum size (in bytes) of msg.
Definition: codec.h:313
dccl::uint32
google::protobuf::uint32 uint32
an unsigned 32 bit integer
Definition: common.h:55
dccl::internal::MessageStack
Definition: field_codec_message_stack.h:36
dccl::Codec
The Dynamic CCL enCODer/DECoder. This is the main class you will use to load, encode and decode DCCL ...
Definition: codec.h:56
dccl::Codec::size
unsigned size(const google::protobuf::Message &msg, int user_id=-1)
Provides the encoded size (in bytes) of msg. This is useful if you need to know the size of a message...
Definition: codec.cpp:429
dccl::Codec::set_strict
void set_strict(bool mode)
Set "strict" mode where a dccl::OutOfRangeException will be thrown for encode if the value(s) provide...
Definition: codec.h:147
dccl::Codec::unload_library
void unload_library(void *dl_handle)
Remove codecs and/or unload messages present in the given shared library handle.
Definition: codec.cpp:628
dccl::hex_encode
void hex_encode(CharIterator begin, CharIterator end, std::string *out, bool upper_case=false)
Encodes a (little-endian) hexadecimal string from a byte string. Index 0 of begin is written to index...
Definition: binary.h:100
dccl::Bitset::from_byte_stream
void from_byte_stream(CharIterator begin, CharIterator end)
Sets the value of the Bitset to the contents of a byte string, where each character represents 8 bits...
Definition: bitset.h:358
dccl::Bitset
A variable size container of bits (subclassed from std::deque<bool>) with an optional hierarchy....
Definition: bitset.h:38
dccl::Codec::load_library
void load_library(void *dl_handle)
Add codecs and/or load messages present in the given shared library handle.
Definition: codec.cpp:616
dccl::FieldCodecManager::find
static boost::shared_ptr< FieldCodecBase > find(const google::protobuf::FieldDescriptor *field, bool has_codec_group, const std::string &codec_group)
Find the codec for a given field. For embedded messages, prefers (dccl.field).codec (inside field) ov...
Definition: field_codec_manager.h:121
dccl::Codec::encode
void encode(std::string *bytes, const google::protobuf::Message &msg, bool header_only=false, int user_id=-1)
Encodes a DCCL message.
Definition: codec.cpp:283
dccl::Codec::set_crypto_passphrase
void set_crypto_passphrase(const std::string &passphrase, const std::set< unsigned > &do_not_encrypt_ids_=std::set< unsigned >())
Set a passphrase to be used when encoded messages to encrypt them and to decrypt messages after decod...
Definition: codec.cpp:641
Message
dccl::Codec::info
void info(std::ostream *os=0, int user_id=-1) const
Writes a human readable summary (including field sizes) of the provided DCCL type to the stream provi...
Definition: codec.h:163
dccl::Codec::load
void load()
All messages must be explicited loaded and validated (size checks, option extensions checks,...
Definition: codec.h:96
dccl::Codec::loaded
const std::map< int32, const google::protobuf::Descriptor * > & loaded() const
Provides a map of all loaded DCCL IDs to the equivalent Protobuf descriptor.
Definition: codec.h:223
dccl::Codec::Codec
Codec(const std::string &dccl_id_codec=default_id_codec_name(), const std::string &library_path="")
Instantiate a Codec, optionally with a non-default identifier field codec.
Definition: codec.cpp:68
dccl::Codec::decode
CharIterator decode(CharIterator begin, CharIterator end, google::protobuf::Message *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:447
dccl::Codec::id
unsigned id() const
Gives the DCCL id (defined by the custom message option extension "(dccl.msg).id" in the ....
Definition: codec.h:182
dccl::Codec::info_all
void info_all(std::ostream *os=0) const
Writes a human readable summary (including field sizes) of all the loaded (validated) DCCL types.
Definition: codec.cpp:661
dccl::Codec::~Codec
virtual ~Codec()
Destructor.
Definition: codec.cpp:80
dccl::Bitset::to_byte_string
std::string to_byte_string()
Returns the value of the Bitset to a byte string, where each character represents 8 bits of the Bitse...
Definition: bitset.h:310