DCCL v3
codec.h
1 // Copyright 2009-2017 Toby Schneider (http://gobysoft.org/index.wt/people/toby)
2 // GobySoft, LLC (for 2013-)
3 // Massachusetts Institute of Technology (for 2007-2014)
4 // Community contributors (see AUTHORS file)
5 //
6 //
7 // This file is part of the Dynamic Compact Control Language Library
8 // ("DCCL").
9 //
10 // DCCL is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 2.1 of the License, or
13 // (at your option) any later version.
14 //
15 // DCCL is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with DCCL. If not, see <http://www.gnu.org/licenses/>.
22 #ifndef DCCL20091211H
23 #define DCCL20091211H
24 
25 #include <string>
26 #include <set>
27 #include <map>
28 #include <ostream>
29 #include <stdexcept>
30 #include <vector>
31 
32 #include <google/protobuf/descriptor.h>
33 
34 #include <boost/shared_ptr.hpp>
35 
36 #include "binary.h"
37 #include "dynamic_protobuf_manager.h"
38 #include "logger.h"
39 #include "exception.h"
40 #include "field_codec.h"
41 #include "field_codec_fixed.h"
42 
43 #include "codecs2/field_codec_default_message.h"
44 #include "codecs3/field_codec_default_message.h"
45 #include "field_codec_manager.h"
46 
47 #define DCCL_HAS_CRYPTOPP 1
48 
50 namespace dccl
51 {
52  class FieldCodec;
53 
56  class Codec
57  {
58  public:
64  Codec(const std::string& dccl_id_codec = default_id_codec_name(),
65  const std::string& library_path = "");
66 
68  virtual ~Codec();
69 
75  void load_library(void* dl_handle);
76 
83  void unload_library(void* dl_handle);
84 
89  void load_library(const std::string& library_path);
90 
95  template<typename ProtobufMessage>
96  void load()
97  { load(ProtobufMessage::descriptor()); }
98 
102  template<typename ProtobufMessage>
103  void unload()
104  { unload(ProtobufMessage::descriptor()); }
105 
106 
111  void load(const google::protobuf::Descriptor* desc);
112 
117  void unload(const google::protobuf::Descriptor* desc);
118 
124  void set_crypto_passphrase(const std::string& passphrase,
125  const std::set<unsigned>& do_not_encrypt_ids_ = std::set<unsigned>());
126 
128 
132 
133 
138  template<typename ProtobufMessage>
139  void info(std::ostream* os = 0) const
140  { info(ProtobufMessage::descriptor(), os); }
141 
145  void info(const google::protobuf::Descriptor* desc, std::ostream* os = 0) const;
146 
150  void info_all(std::ostream* os = 0) const;
151 
155  template <typename ProtobufMessage>
156  unsigned id() const
157  { return id(ProtobufMessage::descriptor()); }
158 
178  unsigned id(const std::string& bytes);
179 
181  template<typename CharIterator>
182  unsigned id(CharIterator begin, CharIterator end);
183 
185  unsigned id(const google::protobuf::Descriptor* desc) const {
186  return desc->options().GetExtension(dccl::msg).id();
187  }
188 
190  const std::map<int32, const google::protobuf::Descriptor*>& loaded() const { return id2desc_; }
191 
193 
194 
198 
199 
206  void encode(std::string* bytes, const google::protobuf::Message& msg, bool header_only = false);
207 
216  size_t encode(char* bytes, size_t max_len, const google::protobuf::Message& msg, bool header_only = false);
217 
226  template <typename CharIterator>
227  CharIterator decode(CharIterator begin, CharIterator end, google::protobuf::Message* msg, bool header_only = false);
228 
235  void decode(const std::string& bytes, google::protobuf::Message* msg, bool header_only = false);
236 
242  void decode(std::string* bytes, google::protobuf::Message* msg);
243 
251  template<typename GoogleProtobufMessagePointer>
252  GoogleProtobufMessagePointer decode(const std::string& bytes, bool header_only = false);
253 
260  template<typename GoogleProtobufMessagePointer>
261  GoogleProtobufMessagePointer decode(std::string* bytes);
262 
267  unsigned size(const google::protobuf::Message& msg);
268 
273  template<typename ProtobufMessage>
274  unsigned max_size()
275  { return max_size(ProtobufMessage::descriptor()); }
276 
278  unsigned max_size(const google::protobuf::Descriptor* desc) const;
279 
284  template<typename ProtobufMessage>
285  unsigned min_size()
286  { return min_size(ProtobufMessage::descriptor()); }
287 
289  unsigned min_size(const google::protobuf::Descriptor* desc) const;
290 
291 
293 
294 
295  static std::string default_id_codec_name()
296  { return "dccl.default.id"; }
297 
298 
299  static std::string default_codec_name(int version = 2)
300  {
301  switch(version)
302  {
303  case 2:
304  return dccl::DCCLFieldOptions::descriptor()->FindFieldByName("codec")->default_value_string();
305  default:
306  return "dccl.default" + boost::lexical_cast<std::string>(version);
307  }
308 
309  }
310 
311 
312  private:
313  friend class v2::DefaultMessageCodec;
314  Codec(const Codec&);
315  Codec& operator= (const Codec&);
316 
317  void encode_internal(const google::protobuf::Message& msg, bool header_only, Bitset& header_bits, Bitset& body_bits);
318 
319  void encrypt(std::string* s, const std::string& nonce);
320  void decrypt(std::string* s, const std::string& nonce);
321 
322  void set_default_codecs();
323 
324  boost::shared_ptr<FieldCodecBase> id_codec() const
325  {
326  return FieldCodecManager::find(google::protobuf::FieldDescriptor::TYPE_UINT32,
327  id_codec_);
328  }
329 
330  private:
331  // SHA256 hash of the crypto passphrase
332  std::string crypto_key_;
333 
334  // set of DCCL IDs *not* to encrypt
335  std::set<unsigned> skip_crypto_ids_;
336 
337  // maps `dccl.id`s onto Message Descriptors
338  std::map<int32, const google::protobuf::Descriptor*> id2desc_;
339  std::string id_codec_;
340 
341  std::vector<void *> dl_handles_;
342 
343  };
344 
345  inline std::ostream& operator<<(std::ostream& os, const Codec& codec)
346  {
347  codec.info_all(&os);
348  return os;
349  }
350 }
351 
352 template<typename GoogleProtobufMessagePointer>
353 GoogleProtobufMessagePointer dccl::Codec::decode(const std::string& bytes, bool header_only /* = false */)
354 {
355  unsigned this_id = id(bytes);
356 
357  if(!id2desc_.count(this_id))
358  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
359 
360  // ownership of this object goes to the caller of decode()
361  GoogleProtobufMessagePointer msg =
362  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(id2desc_.find(this_id)->second);
363  decode(bytes, &(*msg), header_only);
364  return msg;
365 }
366 
367 template<typename GoogleProtobufMessagePointer>
368 GoogleProtobufMessagePointer dccl::Codec::decode(std::string* bytes)
369 {
370  unsigned this_id = id(*bytes);
371 
372  if(!id2desc_.count(this_id))
373  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
374 
375  GoogleProtobufMessagePointer msg =
376  dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(id2desc_.find(this_id)->second);
377  std::string::iterator new_begin = decode(bytes->begin(), bytes->end(), &(*msg));
378  bytes->erase(bytes->begin(), new_begin);
379  return msg;
380 }
381 
382 template<typename CharIterator>
383 unsigned dccl::Codec::id(CharIterator begin, CharIterator end)
384 {
385  unsigned id_min_size = 0, id_max_size = 0;
386  id_codec()->field_min_size(&id_min_size, 0);
387  id_codec()->field_max_size(&id_max_size, 0);
388 
389  if(std::distance(begin, end) < (id_min_size / BITS_IN_BYTE))
390  throw(Exception("Bytes passed (hex: " + hex_encode(begin, end) + ") is too small to be a valid DCCL message"));
391 
392  Bitset fixed_header_bits;
393  fixed_header_bits.from_byte_stream(begin, begin+(size_t)std::ceil(double(id_max_size) / BITS_IN_BYTE));
394 
395  Bitset these_bits(&fixed_header_bits);
396  these_bits.get_more_bits(id_min_size);
397 
398  boost::any return_value;
399  id_codec()->field_decode(&these_bits, &return_value, 0);
400 
401  return boost::any_cast<uint32>(return_value);
402 }
403 
404 template <typename CharIterator>
405 CharIterator dccl::Codec::decode(CharIterator begin, CharIterator end, google::protobuf::Message* msg, bool header_only /*= false*/)
406 {
407  try
408  {
409  unsigned this_id = id(begin, end);
410 
411  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Began decoding message of id: " << this_id << std::endl;
412 
413  if(!id2desc_.count(this_id))
414  throw(Exception("Message id " + boost::lexical_cast<std::string>(this_id) + " has not been loaded. Call load() before decoding this type."));
415 
416  const google::protobuf::Descriptor* desc = msg->GetDescriptor();
417 
418  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Type name: " << desc->full_name() << std::endl;
419 
420  boost::shared_ptr<FieldCodecBase> codec = FieldCodecManager::find(desc);
421  boost::shared_ptr<internal::FromProtoCppTypeBase> helper = internal::TypeHelper::find(desc);
422 
423  CharIterator actual_end = end;
424  if(codec)
425  {
426  unsigned head_size_bits;
427  unsigned body_size_bits;
428  codec->base_max_size(&head_size_bits, desc, HEAD);
429  codec->base_max_size(&body_size_bits, desc, BODY);
430  unsigned id_size = 0;
431  id_codec()->field_size(&id_size, this_id, 0);
432  head_size_bits += id_size;
433 
434  unsigned head_size_bytes = ceil_bits2bytes(head_size_bits);
435  unsigned body_size_bytes = ceil_bits2bytes(body_size_bits);
436 
437  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "Head bytes (bits): " << head_size_bytes << "(" << head_size_bits
438  << "), max body bytes (bits): " << body_size_bytes << "(" << body_size_bits << ")" << std::endl;
439 
440  CharIterator head_bytes_end = begin + head_size_bytes;
441  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head (hex): " << hex_encode(begin, head_bytes_end) << std::endl;
442 
443  Bitset head_bits;
444  head_bits.from_byte_stream(begin, head_bytes_end);
445  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head (bin): " << head_bits << std::endl;
446 
447  // shift off ID bits
448  head_bits >>= id_size;
449 
450  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Head after ID bits removal (bin): " << head_bits << std::endl;
451 
452  internal::MessageStack msg_stack;
453  msg_stack.push(msg->GetDescriptor());
454 
455  codec->base_decode(&head_bits, msg, HEAD);
456  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "after header decode, message is: " << *msg << std::endl;
457 
458 
459  if(header_only)
460  {
461  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "as requested, skipping decrypting and decoding body." << std::endl;
462  actual_end = head_bytes_end;
463  }
464  else
465  {
466  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Encrypted Body (hex): " << hex_encode(head_bytes_end, end) << std::endl;
467 
468  Bitset body_bits;
469  if(!crypto_key_.empty() && !skip_crypto_ids_.count(this_id))
470  {
471  std::string head_bytes(begin, head_bytes_end);
472  std::string body_bytes(head_bytes_end, end);
473  decrypt(&body_bytes, head_bytes);
474  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (hex): " << hex_encode(body_bytes) << std::endl;
475  body_bits.from_byte_stream(body_bytes.begin(), body_bytes.end());
476  }
477  else
478  {
479  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (hex): " << hex_encode(head_bytes_end, end) << std::endl;
480  body_bits.from_byte_stream(head_bytes_end, end);
481  }
482 
483  dlog.is(logger::DEBUG3, logger::DECODE) && dlog << "Unencrypted Body (bin): " << body_bits << std::endl;
484 
485  codec->base_decode(&body_bits, msg, BODY);
486  dlog.is(logger::DEBUG2, logger::DECODE) && dlog << "after header & body decode, message is: " << *msg << std::endl;
487 
488  actual_end = end - body_bits.size()/BITS_IN_BYTE;
489  }
490  }
491  else
492  {
493  throw(Exception("Failed to find (dccl.msg).codec `" + desc->options().GetExtension(dccl::msg).codec() + "`"));
494  }
495 
496  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Successfully decoded message of type: " << desc->full_name() << std::endl;
497  return actual_end;
498  }
499  catch(std::exception& e)
500  {
501  std::stringstream ss;
502 
503  ss << "Message " << hex_encode(begin, end) << " failed to decode. Reason: " << e.what() << std::endl;
504 
505  dlog.is(logger::DEBUG1, logger::DECODE) && dlog << ss.str() << std::endl;
506  throw(Exception(ss.str()));
507  }
508 }
509 
510 #endif
The Dynamic CCL enCODer/DECoder. This is the main class you will use to load, encode and decode DCCL ...
Definition: codec.h:56
virtual ~Codec()
Destructor.
Definition: codec.cpp:79
Codec(const std::string &dccl_id_codec=default_id_codec_name(), const std::string &library_path="")
Instantiate a Codec, optionally with a non-default identifier field codec.
Definition: codec.cpp:67
void load()
All messages must be explicited loaded and validated (size checks, option extensions checks...
Definition: codec.h:96
unsigned max_size()
Provides the encoded maximum size (in bytes) of msg.
Definition: codec.h:274
unsigned id(const google::protobuf::Descriptor *desc) const
Provides the DCCL ID given a DCCL type.
Definition: codec.h:185
Provides the default codec for encoding a base Google Protobuf message or an embedded message by call...
void unload_library(void *dl_handle)
Remove codecs and/or unload messages present in the given shared library handle.
Definition: codec.cpp:588
void load_library(void *dl_handle)
Add codecs and/or load messages present in the given shared library handle.
Definition: codec.cpp:576
static boost::shared_ptr< FieldCodecBase > find(const google::protobuf::FieldDescriptor *field, bool has_codec_group, const std::string &codec_group)
Find the codec for a given field. For embedded messages, prefers (dccl.field).codec (inside field) ov...
unsigned size(const google::protobuf::Message &msg)
Provides the encoded size (in bytes) of msg. This is useful if you need to know the size of a message...
Definition: codec.cpp:390
void from_byte_stream(CharIterator begin, CharIterator end)
Sets the value of the Bitset to the contents of a byte string, where each character represents 8 bits...
Definition: bitset.h:358
unsigned min_size()
Provides the encoded minimum size (in bytes) of msg.
Definition: codec.h:285
google::protobuf::uint32 uint32
an unsigned 32 bit integer
Definition: common.h:55
unsigned id() const
Gives the DCCL id (defined by the custom message option extension "(dccl.msg).id" in the ...
Definition: codec.h:156
const std::map< int32, const google::protobuf::Descriptor * > & loaded() const
Provides a map of all loaded DCCL IDs to the equivalent Protobuf descriptor.
Definition: codec.h:190
void get_more_bits(size_type num_bits)
Retrieve more bits from the parent Bitset.
Definition: bitset.h:447
void info(std::ostream *os=0) const
Writes a human readable summary (including field sizes) of the provided DCCL type to the stream provi...
Definition: codec.h:139
void unload()
Unload a given message.
Definition: codec.h:103
void hex_encode(CharIterator begin, CharIterator end, std::string *out, bool upper_case=false)
Encodes a (little-endian) hexadecimal string from a byte string. Index 0 of begin is written to index...
Definition: binary.h:98
Dynamic Compact Control Language namespace.
void set_crypto_passphrase(const std::string &passphrase, const std::set< unsigned > &do_not_encrypt_ids_=std::set< unsigned >())
Set a passphrase to be used when encoded messages to encrypt them and to decrypt messages after decod...
Definition: codec.cpp:601
bool is(logger::Verbosity verbosity, logger::Group group=logger::GENERAL)
Indicates the verbosity of the Logger until the next std::flush or std::endl. The boolean return is u...
Definition: logger.h:145
Exception class for DCCL.
Definition: exception.h:30
CharIterator decode(CharIterator begin, CharIterator end, google::protobuf::Message *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition: codec.h:405
void info_all(std::ostream *os=0) const
Writes a human readable summary (including field sizes) of all the loaded (validated) DCCL types...
Definition: codec.cpp:621
A variable size container of bits (subclassed from std::deque<bool>) with an optional hierarchy...
Definition: bitset.h:38
void encode(std::string *bytes, const google::protobuf::Message &msg, bool header_only=false)
Encodes a DCCL message.
Definition: codec.cpp:266