DCCL v3
codec.h
1 // Copyright 2009-2017 Toby Schneider (http://gobysoft.org/index.wt/people/toby)
2 // GobySoft, LLC (for 2013-)
3 // Massachusetts Institute of Technology (for 2007-2014)
4 // Community contributors (see AUTHORS file)
5 //
6 //
7 // This file is part of the Dynamic Compact Control Language Library
8 // ("DCCL").
9 //
10 // DCCL is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 2.1 of the License, or
13 // (at your option) any later version.
14 //
15 // DCCL is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with DCCL. If not, see <http://www.gnu.org/licenses/>.
22 #ifndef DCCL20091211H
23 #define DCCL20091211H
24 
25 #include <string>
26 #include <set>
27 #include <map>
28 #include <ostream>
29 #include <stdexcept>
30 #include <vector>
31 
32 #include <google/protobuf/descriptor.h>
33 
34 #include <boost/shared_ptr.hpp>
35 
36 #include "binary.h"
37 #include "dynamic_protobuf_manager.h"
38 #include "logger.h"
39 #include "exception.h"
40 #include "field_codec.h"
41 #include "field_codec_fixed.h"
42 
43 #include "codecs2/field_codec_default_message.h"
44 #include "codecs3/field_codec_default_message.h"
45 #include "field_codec_manager.h"
46 
47 #define DCCL_HAS_CRYPTOPP 1
48 
50 namespace dccl
51 {
52  class FieldCodec;
53 
56  class Codec
57  {
58  public:
64  Codec(const std::string& dccl_id_codec = default_id_codec_name(),
65  const std::string& library_path = "");
66 
68  virtual ~Codec();
69 
75  void load_library(void* dl_handle);
76 
83  void unload_library(void* dl_handle);
84 
89  void load_library(const std::string& library_path);
90 
95  template<typename ProtobufMessage>
96  void load()
97  { load(ProtobufMessage::descriptor()); }
98 
102  template<typename ProtobufMessage>
103  void unload()
104  { unload(ProtobufMessage::descriptor()); }
105 
106 
113  void load(const google::protobuf::Descriptor* desc, int user_id = - 1);
114 
119  void unload(const google::protobuf::Descriptor* desc);
120 
125  void unload(size_t dccl_id);
126 
132  void set_crypto_passphrase(const std::string& passphrase,
133  const std::set<unsigned>& do_not_encrypt_ids_ = std::set<unsigned>());
134 
135 
139  void set_strict(bool mode) { strict_ = mode; }
140 
142 
146 
147 
154  template<typename ProtobufMessage>
155  void info(std::ostream* os = 0, int user_id = -1) const
156  { info(ProtobufMessage::descriptor(), os, user_id); }
157 
163  void info(const google::protobuf::Descriptor* desc, std::ostream* os = 0, int user_id = - 1) const;
164 
168  void info_all(std::ostream* os = 0) const;
169 
173  template <typename ProtobufMessage>
174  unsigned id() const
175  { return id(ProtobufMessage::descriptor()); }
176 
196  unsigned id(const std::string& bytes) const;
197 
199  template<typename CharIterator>
200  unsigned id(CharIterator begin, CharIterator end) const;
201 
203  unsigned id(const google::protobuf::Descriptor* desc) const {
204  Bitset id_bits;
205  dccl::uint32 hardcoded_id = desc->options().GetExtension(dccl::msg).id();
206  // pass the hard coded id, that is, (dccl.msg).id,
207  // through encode/decode to allow a custom ID codec (if in use)
208  // to always take effect.
209  id_codec()->field_encode(&id_bits, hardcoded_id, 0);
210  std::string id_bytes(id_bits.to_byte_string());
211  return id(id_bytes);
212  }
213 
215  const std::map<int32, const google::protobuf::Descriptor*>& loaded() const { return id2desc_; }
216 
218 
219 
223 
224 
233  void encode(std::string* bytes, const google::protobuf::Message& msg, bool header_only = false, int user_id = -1);
234 
245  size_t encode(char* bytes, size_t max_len, const google::protobuf::Message& msg, bool header_only = false, int user_id = -1);
246 
255  template <typename CharIterator>
256  CharIterator decode(CharIterator begin, CharIterator end, google::protobuf::Message* msg, bool header_only = false);
257 
264  void decode(const std::string& bytes, google::protobuf::Message* msg, bool header_only = false);
265 
271  void decode(std::string* bytes, google::protobuf::Message* msg);
272 
280  template<typename GoogleProtobufMessagePointer>
281  GoogleProtobufMessagePointer decode(const std::string& bytes, bool header_only = false);
282 
289  template<typename GoogleProtobufMessagePointer>
290  GoogleProtobufMessagePointer decode(std::string* bytes);
291 
298  unsigned size(const google::protobuf::Message& msg, int user_id = -1);
299 
304  template<typename ProtobufMessage>
305  unsigned max_size()
306  { return max_size(ProtobufMessage::descriptor()); }
307 
309  unsigned max_size(const google::protobuf::Descriptor* desc) const;
310 
315  template<typename ProtobufMessage>
316  unsigned min_size()
317  { return min_size(ProtobufMessage::descriptor()); }
318 
320  unsigned min_size(const google::protobuf::Descriptor* desc) const;
321 
322 
324 
325 
326  static std::string default_id_codec_name()
327  { return "dccl.default.id"; }
328 
329 
330  static std::string default_codec_name(int version = 2)
331  {
332  switch(version)
333  {
334  case 2:
335  return dccl::DCCLFieldOptions::descriptor()->FindFieldByName("codec")->default_value_string();
336  default:
337  return "dccl.default" + boost::lexical_cast<std::string>(version);
338  }
339 
340  }
341 
342 
343  private:
344  friend class v2::DefaultMessageCodec;
345  Codec(const Codec&);
346  Codec& operator= (const Codec&);
347 
348  void encode_internal(const google::protobuf::Message& msg, bool header_only, Bitset& header_bits, Bitset& body_bits, int user_id);
349 
350  void encrypt(std::string* s, const std::string& nonce);
351  void decrypt(std::string* s, const std::string& nonce);
352 
353  void set_default_codecs();
354 
355  boost::shared_ptr<FieldCodecBase> id_codec() const
356  {
357  return FieldCodecManager::find(google::protobuf::FieldDescriptor::TYPE_UINT32,
358  id_codec_);
359  }
360 
361  private:
362  // SHA256 hash of the crypto passphrase
363  std::string crypto_key_;
364 
365  // strict mode setting
366  bool strict_;
367 
368  // set of DCCL IDs *not* to encrypt
369  std::set<unsigned> skip_crypto_ids_;
370 
371  // maps `dccl.id`s onto Message Descriptors
372  std::map<int32, const google::protobuf::Descriptor*> id2desc_;
373  std::string id_codec_;
374 
375  std::vector<void *> dl_handles_;
376 
377  };
378 
379  inline std::ostream& operator<<(std::ostream& os, const Codec& codec)
380  {
381  codec.info_all(&os);
382  return os;
383  }
384 }
385 
386 template<typename GoogleProtobufMessagePointer>
387 GoogleProtobufMessagePointer dccl::Codec::decode(const std::string& bytes, bool header_only /* = false */)
388 {
389  unsigned this_id = id(bytes);
390 
391  if(!id2desc_.count(this_id))
392  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
393 
394  // ownership of this object goes to the caller of decode()
395  GoogleProtobufMessagePointer msg =
396  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(id2desc_.find(this_id)->second);
397  decode(bytes, &(*msg), header_only);
398  return msg;
399 }
400 
401 template<typename GoogleProtobufMessagePointer>
402 GoogleProtobufMessagePointer dccl::Codec::decode(std::string* bytes)
403 {
404  unsigned this_id = id(*bytes);
405 
406  if(!id2desc_.count(this_id))
407  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
408 
409  GoogleProtobufMessagePointer msg =
410  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(id2desc_.find(this_id)->second);
411  std::string::iterator new_begin = decode(bytes->begin(), bytes->end(), &(*msg));
412  bytes->erase(bytes->begin(), new_begin);
413  return msg;
414 }
415 
416 template<typename CharIterator>
417 unsigned dccl::Codec::id(CharIterator begin, CharIterator end) const
418 {
419  unsigned id_min_size = 0, id_max_size = 0;
420  id_codec()->field_min_size(&id_min_size, 0);
421  id_codec()->field_max_size(&id_max_size, 0);
422 
423  if(std::distance(begin, end) < (id_min_size / BITS_IN_BYTE))
424  throw(Exception("Bytes passed (hex: " + hex_encode(begin, end) + ") is too small to be a valid DCCL message"));
425 
426  Bitset fixed_header_bits;
427  fixed_header_bits.from_byte_stream(begin, begin+(size_t)std::ceil(double(id_max_size) / BITS_IN_BYTE));
428 
429  Bitset these_bits(&fixed_header_bits);
430  these_bits.get_more_bits(id_min_size);
431 
432  boost::any return_value;
433  id_codec()->field_decode(&these_bits, &return_value, 0);
434 
435  return boost::any_cast<uint32>(return_value);
436 }
437 
438 template <typename CharIterator>
439 CharIterator dccl::Codec::decode(CharIterator begin, CharIterator end, google::protobuf::Message* msg, bool header_only /*= false*/)
440 {
441  try
442  {
443  unsigned this_id = id(begin, end);
444 
445  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Began decoding message of id: " << this_id << std::endl;
446 
447  if(!id2desc_.count(this_id))
448  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
449 
450  const google::protobuf::Descriptor* desc = msg->GetDescriptor();
451 
452  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Type name: " << desc->full_name() << std::endl;
453 
454  boost::shared_ptr<FieldCodecBase> codec = FieldCodecManager::find(desc);
455  boost::shared_ptr<internal::FromProtoCppTypeBase> helper = internal::TypeHelper::find(desc);
456 
457  CharIterator actual_end = end;
458  if(codec)
459  {
460  unsigned head_size_bits;
461  unsigned body_size_bits;
462  codec->base_max_size(&head_size_bits, desc, HEAD);
463  codec->base_max_size(&body_size_bits, desc, BODY);
464  unsigned id_size = 0;
465  id_codec()->field_size(&id_size, this_id, 0);
466  head_size_bits += id_size;
467 
468  unsigned head_size_bytes = ceil_bits2bytes(head_size_bits);
469  unsigned body_size_bytes = ceil_bits2bytes(body_size_bits);
470 
471  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "Head bytes (bits): " << head_size_bytes << "(" << head_size_bits
472  << "), max body bytes (bits): " << body_size_bytes << "(" << body_size_bits << ")" << std::endl;
473 
474  CharIterator head_bytes_end = begin + head_size_bytes;
475  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head (hex): " << hex_encode(begin, head_bytes_end) << std::endl;
476 
477  Bitset head_bits;
478  head_bits.from_byte_stream(begin, head_bytes_end);
479  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head (bin): " << head_bits << std::endl;
480 
481  // shift off ID bits
482  head_bits >>= id_size;
483 
484  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head after ID bits removal (bin): " << head_bits << std::endl;
485 
486  internal::MessageStack msg_stack;
487  msg_stack.push(msg->GetDescriptor());
488 
489  codec->base_decode(&head_bits, msg, HEAD);
490  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "after header decode, message is: " << *msg << std::endl;
491 
492 
493  if(header_only)
494  {
495  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "as requested, skipping decrypting and decoding body." << std::endl;
496  actual_end = head_bytes_end;
497  }
498  else
499  {
500  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Encrypted Body (hex): " << hex_encode(head_bytes_end, end) << std::endl;
501 
502  Bitset body_bits;
503  if(!crypto_key_.empty() && !skip_crypto_ids_.count(this_id))
504  {
505  std::string head_bytes(begin, head_bytes_end);
506  std::string body_bytes(head_bytes_end, end);
507  decrypt(&body_bytes, head_bytes);
508  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (hex): " << hex_encode(body_bytes) << std::endl;
509  body_bits.from_byte_stream(body_bytes.begin(), body_bytes.end());
510  }
511  else
512  {
513  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (hex): " << hex_encode(head_bytes_end, end) << std::endl;
514  body_bits.from_byte_stream(head_bytes_end, end);
515  }
516 
517  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (bin): " << body_bits << std::endl;
518 
519  codec->base_decode(&body_bits, msg, BODY);
520  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "after header & body decode, message is: " << *msg << std::endl;
521 
522  actual_end = end - body_bits.size()/BITS_IN_BYTE;
523  }
524  }
525  else
526  {
527  throw(Exception("Failed to find (dccl.msg).codec `" + desc->options().GetExtension(dccl::msg).codec() + "`"));
528  }
529 
530  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Successfully decoded message of type: " << desc->full_name() << std::endl;
531  return actual_end;
532  }
533  catch(std::exception& e)
534  {
535  std::stringstream ss;
536 
537  ss << "Message " << hex_encode(begin, end) << " failed to decode. Reason: " << e.what() << std::endl;
538 
539  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << ss.str() << std::endl;
540  throw(Exception(ss.str()));
541  }
542 }
543 
544 #endif
The Dynamic CCL enCODer/DECoder. This is the main class you will use to load, encode and decode DCCL ...
Definition: codec.h:56
virtual ~Codec()
Destructor.
Definition: codec.cpp:80
void info(std::ostream *os=0, int user_id=-1) const
Writes a human readable summary (including field sizes) of the provided DCCL type to the stream provi...
Definition: codec.h:155
Codec(const std::string &dccl_id_codec=default_id_codec_name(), const std::string &library_path="")
Instantiate a Codec, optionally with a non-default identifier field codec.
Definition: codec.cpp:68
void load()
All messages must be explicited loaded and validated (size checks, option extensions checks...
Definition: codec.h:96
unsigned max_size()
Provides the encoded maximum size (in bytes) of msg.
Definition: codec.h:305
unsigned id(const google::protobuf::Descriptor *desc) const
Provides the DCCL ID given a DCCL type.
Definition: codec.h:203
Provides the default codec for encoding a base Google Protobuf message or an embedded message by call...
void encode(std::string *bytes, const google::protobuf::Message &msg, bool header_only=false, int user_id=-1)
Encodes a DCCL message.
Definition: codec.cpp:283
void unload_library(void *dl_handle)
Remove codecs and/or unload messages present in the given shared library handle.
Definition: codec.cpp:628
void load_library(void *dl_handle)
Add codecs and/or load messages present in the given shared library handle.
Definition: codec.cpp:616
static boost::shared_ptr< FieldCodecBase > find(const google::protobuf::FieldDescriptor *field, bool has_codec_group, const std::string &codec_group)
Find the codec for a given field. For embedded messages, prefers (dccl.field).codec (inside field) ov...
void from_byte_stream(CharIterator begin, CharIterator end)
Sets the value of the Bitset to the contents of a byte string, where each character represents 8 bits...
Definition: bitset.h:358
void set_strict(bool mode)
Set "strict" mode where a dccl::OutOfRangeException will be thrown for encode if the value(s) provide...
Definition: codec.h:139
unsigned min_size()
Provides the encoded minimum size (in bytes) of msg.
Definition: codec.h:316
google::protobuf::uint32 uint32
an unsigned 32 bit integer
Definition: common.h:55
unsigned size(const google::protobuf::Message &msg, int user_id=-1)
Provides the encoded size (in bytes) of msg. This is useful if you need to know the size of a message...
Definition: codec.cpp:429
unsigned id() const
Gives the DCCL id (defined by the custom message option extension "(dccl.msg).id" in the ...
Definition: codec.h:174
const std::map< int32, const google::protobuf::Descriptor * > & loaded() const
Provides a map of all loaded DCCL IDs to the equivalent Protobuf descriptor.
Definition: codec.h:215
void get_more_bits(size_type num_bits)
Retrieve more bits from the parent Bitset.
Definition: bitset.h:447
void unload()
Unload a given message.
Definition: codec.h:103
void hex_encode(CharIterator begin, CharIterator end, std::string *out, bool upper_case=false)
Encodes a (little-endian) hexadecimal string from a byte string. Index 0 of begin is written to index...
Definition: binary.h:100
Dynamic Compact Control Language namespace.
void set_crypto_passphrase(const std::string &passphrase, const std::set< unsigned > &do_not_encrypt_ids_=std::set< unsigned >())
Set a passphrase to be used when encoded messages to encrypt them and to decrypt messages after decod...
Definition: codec.cpp:641
bool is(logger::Verbosity verbosity, logger::Group group=logger::GENERAL)
Indicates the verbosity of the Logger until the next std::flush or std::endl. The boolean return is u...
Definition: logger.h:145
Exception class for DCCL.
Definition: exception.h:31
std::string to_byte_string()
Returns the value of the Bitset to a byte string, where each character represents 8 bits of the Bitse...
Definition: bitset.h:310
CharIterator decode(CharIterator begin, CharIterator end, google::protobuf::Message *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:439
void info_all(std::ostream *os=0) const
Writes a human readable summary (including field sizes) of all the loaded (validated) DCCL types...
Definition: codec.cpp:661
A variable size container of bits (subclassed from std::deque<bool>) with an optional hierarchy...
Definition: bitset.h:38