DCCL v4
Loading...
Searching...
No Matches
codec.h
1// Copyright 2009-2023:
2// GobySoft, LLC (2013-)
3// Massachusetts Institute of Technology (2007-2014)
4// Community contributors (see AUTHORS file)
5// File authors:
6// Toby Schneider <toby@gobysoft.org>
7// Nathan Knotts <nknotts@gmail.com>
8// philboske <philboske@gmail.com>
9// Chris Murphy <cmurphy@aphysci.com>
10//
11//
12// This file is part of the Dynamic Compact Control Language Library
13// ("DCCL").
14//
15// DCCL is free software: you can redistribute it and/or modify
16// it under the terms of the GNU Lesser General Public License as published by
17// the Free Software Foundation, either version 2.1 of the License, or
18// (at your option) any later version.
19//
20// DCCL is distributed in the hope that it will be useful,
21// but WITHOUT ANY WARRANTY; without even the implied warranty of
22// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23// GNU Lesser General Public License for more details.
24//
25// You should have received a copy of the GNU Lesser General Public License
26// along with DCCL. If not, see <http://www.gnu.org/licenses/>.
27#ifndef DCCL20091211H
28#define DCCL20091211H
29
30#include <map>
31#include <ostream>
32#include <set>
33#include <stdexcept>
34#include <string>
35#include <type_traits>
36#include <vector>
37
38#include <google/protobuf/descriptor.h>
39
40#include <memory>
41
42#include "binary.h"
43#include "dynamic_protobuf_manager.h"
44#include "exception.h"
45#include "field_codec.h"
46#include "field_codec_fixed.h"
47#include "logger.h"
48
49#include "codecs2/field_codec_default_message.h"
50#include "codecs3/field_codec_default_message.h"
51#include "dccl/def.h"
52#include "dccl/version.h"
53#include "field_codec_manager.h"
54
56namespace dccl
57{
58class FieldCodec;
59
62class Codec
63{
64 public:
70 Codec(std::string dccl_id_codec_name = default_id_codec_name(),
71 const std::string& library_path = "");
72
79 template <class IDFieldCodec,
80 typename std::enable_if<std::is_base_of<FieldCodecBase, IDFieldCodec>::value,
81 int>::type = 0>
82 Codec(const std::string& dccl_id_codec_name, const IDFieldCodec& dccl_id_codec) // NOLINT
83 : id_codec_(dccl_id_codec_name)
84 {
85 set_default_codecs();
86 manager_.add<IDFieldCodec>(dccl_id_codec_name);
87 }
88
90 virtual ~Codec();
91
92 Codec(const Codec&) = delete;
93 Codec& operator=(const Codec&) = delete;
94
100 void load_library(void* dl_handle);
101
108 void unload_library(void* dl_handle);
109
114 void load_library(const std::string& library_path);
115
121 template <typename ProtobufMessage> std::size_t load()
122 {
123 return load(ProtobufMessage::descriptor());
124 }
125
129 template <typename ProtobufMessage> void unload() { unload(ProtobufMessage::descriptor()); }
130
131 void unload_all() { id2desc_.clear(); }
132
140 std::size_t load(const google::protobuf::Descriptor* desc, int user_id = -1);
141
146 void unload(const google::protobuf::Descriptor* desc);
147
152 void unload(size_t dccl_id);
153
155 void set_id_codec(const std::string& id_codec_name);
156 std::string get_id_codec() { return id_codec_; }
157
163 void set_crypto_passphrase(const std::string& passphrase,
164 const std::set<int32>& do_not_encrypt_ids = std::set<int32>());
165
166 void set_crypto_passphrase(const std::string& passphrase,
167 const std::set<unsigned>& do_not_encrypt_ids)
168 {
169 std::set<int32> s_ids{do_not_encrypt_ids.begin(), do_not_encrypt_ids.end()};
170 set_crypto_passphrase(passphrase, s_ids);
171 }
172
176 void set_strict(bool mode) { strict_ = mode; }
177
181 void set_console_width(unsigned num_chars) { console_width_ = num_chars; }
182
184
188
189
196 template <typename ProtobufMessage>
197 void info(std::ostream* os = nullptr, int user_id = -1) const
198 {
199 info(ProtobufMessage::descriptor(), os, user_id);
200 }
201
207 void info(const google::protobuf::Descriptor* desc, std::ostream* os = nullptr,
208 int user_id = -1) const;
209
213 void info_all(std::ostream* os = nullptr) const;
214
218 template <typename ProtobufMessage> int32 id() const
219 {
220 return id(ProtobufMessage::descriptor());
221 }
222
242 int32 id(const std::string& bytes) const;
243
245 template <typename CharIterator> int32 id(CharIterator begin, CharIterator end) const;
246
248 int32 id(const google::protobuf::Descriptor* desc) const
249 {
250 if (desc->options().GetExtension(dccl::msg).omit_id())
251 throw(Exception("Cannot call id(...) on message with omit_id == true"));
252 Bitset id_bits;
253 dccl::uint32 hardcoded_id = desc->options().GetExtension(dccl::msg).id();
254 // pass the hard coded id, that is, (dccl.msg).id,
255 // through encode/decode to allow a custom ID codec (if in use)
256 // to always take effect.
257 id_codec()->field_encode(&id_bits, hardcoded_id, nullptr);
258 std::string id_bytes(id_bits.to_byte_string());
259 return id(id_bytes);
260 }
261
263 const std::map<int32, const google::protobuf::Descriptor*>& loaded() const { return id2desc_; }
264
266
270
271
280 void encode(std::string* bytes, const google::protobuf::Message& msg, bool header_only = false,
281 int user_id = -1);
282
293 size_t encode(char* bytes, size_t max_len, const google::protobuf::Message& msg,
294 bool header_only = false, int user_id = -1);
295
304 template <typename CharIterator, typename ProtobufMessage>
305 CharIterator decode(CharIterator begin, CharIterator end, ProtobufMessage* msg,
306 bool header_only = false);
307
314 template <typename ProtobufMessage>
315 void decode(const std::string& bytes, ProtobufMessage* msg, bool header_only = false)
316 {
317 decode(bytes.begin(), bytes.end(), msg, header_only);
318 }
319
325 template <typename ProtobufMessage> void decode(std::string* bytes, ProtobufMessage* msg)
326 {
327 decode(*bytes, msg);
328 unsigned last_size = size(*msg);
329 bytes->erase(0, last_size);
330 }
331
339 template <typename GoogleProtobufMessagePointer>
340 GoogleProtobufMessagePointer decode(const std::string& bytes, bool header_only = false);
341
348 template <typename GoogleProtobufMessagePointer>
349 GoogleProtobufMessagePointer decode(std::string* bytes);
350
357 unsigned size(const google::protobuf::Message& msg, int user_id = -1);
358
363 template <typename ProtobufMessage> unsigned max_size()
364 {
365 return max_size(ProtobufMessage::descriptor());
366 }
367
369 unsigned max_size(const google::protobuf::Descriptor* desc) const;
370
375 template <typename ProtobufMessage> unsigned min_size()
376 {
377 return min_size(ProtobufMessage::descriptor());
378 }
379
381 unsigned min_size(const google::protobuf::Descriptor* desc) const;
382
384
385 static std::string default_id_codec_name() { return "dccl.default.id"; }
386
387 static std::string default_codec_name(int version = 2)
388 {
389 return "dccl.default" + std::to_string(version);
390 }
391
392 FieldCodecManagerLocal& manager() { return manager_; }
393
394 private:
395 void encode_internal(const google::protobuf::Message& msg, bool header_only,
396 Bitset& header_bits, Bitset& body_bits, int user_id);
397 std::string get_all_error_fields_in_message(const google::protobuf::Message& msg,
398 uint8_t depth = 1);
399
400 void encrypt(std::string* s, const std::string& nonce);
401 void decrypt(std::string* s, const std::string& nonce);
402
403 void set_default_codecs();
404
405 std::shared_ptr<FieldCodecBase> id_codec() const
406 {
407 return manager_.find(google::protobuf::FieldDescriptor::TYPE_UINT32, DCCL_VERSION_MAJOR,
408 id_codec_);
409 }
410
411 int32 id_internal(const google::protobuf::Descriptor* desc, int user_id)
412 {
413 // if we have omit_id, check for or assign an autogenerate negative internal placeholder ID
414 if (desc->options().GetExtension(dccl::msg).omit_id() && !desc2placeholder_id_.count(desc))
415 desc2placeholder_id_.insert(std::make_pair(desc, omit_id_placeholder_id_--));
416
417 return id_internal_const(desc, user_id);
418 }
419
420 int32 id_internal_const(const google::protobuf::Descriptor* desc, int user_id) const
421 {
422 if (desc->options().GetExtension(dccl::msg).omit_id())
423 {
424 if (desc2placeholder_id_.count(desc))
425 return desc2placeholder_id_.find(desc)->second;
426 else
427 throw(Exception("Message " + desc->full_name() +
428 " has omit_id == true but has not been loaded, so id_internal() "
429 "const cannot be called"));
430 }
431 else
432 {
433 return (user_id < 0) ? id(desc) : user_id;
434 }
435 }
436
437 private:
438 // SHA256 hash of the crypto passphrase
439 std::string crypto_key_;
440
441 // strict mode setting
442 bool strict_{false};
443
444 // console outputting format width
445 unsigned console_width_{60};
446
447 // set of DCCL IDs *not* to encrypt
448 std::set<int32> skip_crypto_ids_;
449
450 // maps `dccl.id`s onto Message Descriptors
451 std::map<int32, const google::protobuf::Descriptor*> id2desc_;
452 std::string id_codec_;
453
454 std::vector<void*> dl_handles_;
455
456 std::string build_guard_for_console_output(std::string& base, char guard_char) const;
457
458 FieldCodecManagerLocal manager_;
459
460 // current omit_id placeholder DCCL Id (starts at -1 and decrements)
461 int32 omit_id_placeholder_id_{-1};
462 // maps message descriptor onto placeholder ID for omit_id messages
463 std::map<const google::protobuf::Descriptor*, int32> desc2placeholder_id_;
464};
465
466inline std::ostream& operator<<(std::ostream& os, const Codec& codec)
467{
468 codec.info_all(&os);
469 return os;
470}
471} // namespace dccl
472
473template <typename GoogleProtobufMessagePointer>
474GoogleProtobufMessagePointer dccl::Codec::decode(const std::string& bytes,
475 bool header_only /* = false */)
476{
477 int32 this_id = id(bytes);
478
479 if (!id2desc_.count(this_id))
480 throw(Exception("Message id " + std::to_string(this_id) +
481 " has not been loaded. Call load() before decoding this type."));
482
483 // ownership of this object goes to the caller of decode()
484 auto msg = dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(
485 id2desc_.find(this_id)->second);
486 decode(bytes, &(*msg), header_only);
487 return msg;
488}
489
490template <typename GoogleProtobufMessagePointer>
491GoogleProtobufMessagePointer dccl::Codec::decode(std::string* bytes)
492{
493 int32 this_id = id(*bytes);
494
495 if (!id2desc_.count(this_id))
496 throw(Exception("Message id " + std::to_string(this_id) +
497 " has not been loaded. Call load() before decoding this type."));
498
499 GoogleProtobufMessagePointer msg =
500 dccl::DynamicProtobufManager::new_protobuf_message<GoogleProtobufMessagePointer>(
501 id2desc_.find(this_id)->second);
502 std::string::iterator new_begin = decode(bytes->begin(), bytes->end(), &(*msg));
503 bytes->erase(bytes->begin(), new_begin);
504 return msg;
505}
506
507template <typename CharIterator>
508dccl::int32 dccl::Codec::id(CharIterator begin, CharIterator end) const
509{
510 try
511 {
512 unsigned id_min_size = 0, id_max_size = 0;
513 id_codec()->field_min_size(&id_min_size, nullptr);
514 id_codec()->field_max_size(&id_max_size, nullptr);
515 Bitset fixed_header_bits;
516
517 // ensure we don't go past-the-end if fewer bytes are passed in than id_max_size
518 int incr = std::min<size_t>(
519 static_cast<size_t>(std::distance(begin, end)),
520 static_cast<size_t>(std::ceil(static_cast<double>(id_max_size) / BITS_IN_BYTE)));
521 fixed_header_bits.from_byte_stream(begin, begin + incr);
522
523 Bitset these_bits(&fixed_header_bits);
524 these_bits.get_more_bits(id_min_size);
525
526 dccl::any return_value;
527 id_codec()->field_decode(&these_bits, &return_value, nullptr);
528 return dccl::any_cast<uint32>(return_value);
529 }
530 catch (const dccl::Exception& e)
531 {
532 throw(Exception("Failed to decoded id from bytes passed (hex: " + hex_encode(begin, end) +
533 ")"));
534 }
535}
536
537template <typename CharIterator, typename ProtobufMessage>
538CharIterator dccl::Codec::decode(CharIterator begin, CharIterator end, ProtobufMessage* msg,
539 bool header_only /*= false*/)
540{
541 try
542 {
543 const google::protobuf::Descriptor* desc = msg->GetDescriptor();
544 int32 expected_id = id_internal(desc, -1);
545 int32 received_id =
546 expected_id; // if omit_id, we have to assume we have the correct type. Otherwise, overwrite if not omit_id and check
547 if (!desc->options().GetExtension(dccl::msg).omit_id())
548 {
549 received_id = id(begin, end);
550
551 if (!id2desc_.count(received_id))
552 throw(Exception("Message id " + std::to_string(received_id) +
553 " has not been loaded. Call load() before decoding this type."));
554
555 if (expected_id != received_id)
556 throw(Exception("Received message with id " + std::to_string(received_id) + " (" +
557 id2desc_.at(received_id)->full_name() +
558 ") but decode was called with message of id " +
559 std::to_string(expected_id) + " (" + desc->full_name() +
560 "). Ensure dccl::Codec::decode is called with the correct Protobuf "
561 "message or use the dynamic overloads of decode."));
562 }
563
564 dlog.is(logger::DEBUG1, logger::DECODE) &&
565 dlog << "Began decoding message of id: " << received_id << std::endl;
566
567 dlog.is(logger::DEBUG1, logger::DECODE) && dlog << "Type name: " << desc->full_name()
568 << std::endl;
569
570 std::shared_ptr<FieldCodecBase> codec = manager_.find(desc);
571 std::shared_ptr<internal::FromProtoCppTypeBase> helper = manager_.type_helper().find(desc);
572
573 CharIterator actual_end = end;
574 if (codec)
575 {
576 unsigned head_size_bits;
577 unsigned body_size_bits;
578 codec->base_max_size(&head_size_bits, desc, HEAD);
579 codec->base_max_size(&body_size_bits, desc, BODY);
580 unsigned id_size = 0;
581 if (!desc->options().GetExtension(dccl::msg).omit_id())
582 id_codec()->field_size(&id_size, static_cast<uint32>(received_id), nullptr);
583 head_size_bits += id_size;
584
585 unsigned head_size_bytes = ceil_bits2bytes(head_size_bits);
586 unsigned body_size_bytes = ceil_bits2bytes(body_size_bits);
587
588 dlog.is(logger::DEBUG2, logger::DECODE) &&
589 dlog << "Head bytes (bits): " << head_size_bytes << "(" << head_size_bits
590 << "), max body bytes (bits): " << body_size_bytes << "(" << body_size_bits
591 << ")" << std::endl;
592
593 CharIterator head_bytes_end = begin + head_size_bytes;
594 dlog.is(logger::DEBUG3, logger::DECODE) &&
595 dlog << "Unencrypted Head (hex): " << hex_encode(begin, head_bytes_end)
596 << std::endl;
597
598 Bitset head_bits;
599 head_bits.from_byte_stream(begin, head_bytes_end);
600 dlog.is(logger::DEBUG3, logger::DECODE) &&
601 dlog << "Unencrypted Head (bin): " << head_bits << std::endl;
602
603 // shift off ID bits
604 head_bits >>= id_size;
605
606 dlog.is(logger::DEBUG3, logger::DECODE) &&
607 dlog << "Unencrypted Head after ID bits removal (bin): " << head_bits << std::endl;
608
609 internal::MessageStack msg_stack(manager_.codec_data().root_message_,
610 manager_.codec_data().message_data_);
611 msg_stack.push(msg->GetDescriptor());
612
613 codec->base_decode(&head_bits, msg, HEAD);
614 dlog.is(logger::DEBUG2, logger::DECODE) &&
615 dlog << "after header decode, message is: " << *msg << std::endl;
616
617 if (header_only)
618 {
619 dlog.is(logger::DEBUG2, logger::DECODE) &&
620 dlog << "as requested, skipping decrypting and decoding body." << std::endl;
621 actual_end = head_bytes_end;
622 }
623 else
624 {
625 dlog.is(logger::DEBUG3, logger::DECODE) &&
626 dlog << "Encrypted Body (hex): " << hex_encode(head_bytes_end, end)
627 << std::endl;
628
629 Bitset body_bits;
630 if (!crypto_key_.empty() && !skip_crypto_ids_.count(received_id))
631 {
632 std::string head_bytes(begin, head_bytes_end);
633 std::string body_bytes(head_bytes_end, end);
634 decrypt(&body_bytes, head_bytes);
635 dlog.is(logger::DEBUG3, logger::DECODE) &&
636 dlog << "Unencrypted Body (hex): " << hex_encode(body_bytes) << std::endl;
637 body_bits.from_byte_stream(body_bytes.begin(), body_bytes.end());
638 }
639 else
640 {
641 dlog.is(logger::DEBUG3, logger::DECODE) &&
642 dlog << "Unencrypted Body (hex): " << hex_encode(head_bytes_end, end)
643 << std::endl;
644 body_bits.from_byte_stream(head_bytes_end, end);
645 }
646
647 dlog.is(logger::DEBUG3, logger::DECODE) &&
648 dlog << "Unencrypted Body (bin): " << body_bits << std::endl;
649
650 codec->base_decode(&body_bits, msg, BODY);
651 dlog.is(logger::DEBUG2, logger::DECODE) &&
652 dlog << "after header & body decode, message is: " << *msg << std::endl;
653
654 actual_end = end - body_bits.size() / BITS_IN_BYTE;
655 }
656 }
657 else
658 {
659 throw(Exception("Failed to find (dccl.msg).codec `" +
660 desc->options().GetExtension(dccl::msg).codec() + "`"),
661 desc);
662 }
663
664 dlog.is(logger::DEBUG1, logger::DECODE) &&
665 dlog << "Successfully decoded message of type: " << desc->full_name() << std::endl;
666 return actual_end;
667 }
668 catch (std::exception& e)
669 {
670 std::stringstream ss;
671
672 ss << "Message " << hex_encode(begin, end) << " failed to decode. Reason: " << e.what()
673 << std::endl;
674
675 dlog.is(logger::DEBUG1, logger::DECODE) && dlog << ss.str() << std::endl;
676 throw(Exception(ss.str()));
677 }
678}
679
680#endif
A variable size container of bits (subclassed from std::deque<bool>) with an optional hierarchy....
Definition bitset.h:43
std::string to_byte_string()
Returns the value of the Bitset to a byte string, where each character represents 8 bits of the Bitse...
Definition bitset.h:297
void from_byte_stream(CharIterator begin, CharIterator end)
Sets the value of the Bitset to the contents of a byte string, where each character represents 8 bits...
Definition bitset.h:341
The Dynamic CCL enCODer/DECoder. This is the main class you will use to load, encode and decode DCCL ...
Definition codec.h:63
int32 id(const google::protobuf::Descriptor *desc) const
Provides the DCCL ID given a DCCL type.
Definition codec.h:248
void info(std::ostream *os=nullptr, int user_id=-1) const
Writes a human readable summary (including field sizes) of the provided DCCL type to the stream provi...
Definition codec.h:197
void set_strict(bool mode)
Set "strict" mode where a dccl::OutOfRangeException will be thrown for encode if the value(s) provide...
Definition codec.h:176
void info_all(std::ostream *os=nullptr) const
Writes a human readable summary (including field sizes) of all the loaded (validated) DCCL types.
Definition codec.cpp:707
const std::map< int32, const google::protobuf::Descriptor * > & loaded() const
Provides a map of all loaded DCCL IDs to the equivalent Protobuf descriptor.
Definition codec.h:263
void set_id_codec(const std::string &id_codec_name)
Set a different ID codec name (note that is calls unload_all() so all messages must be reloaded)
Definition codec.cpp:732
unsigned min_size()
Provides the encoded minimum size (in bytes) of msg.
Definition codec.h:375
void decode(std::string *bytes, ProtobufMessage *msg)
Decode a DCCL message when the type is known at compile time.
Definition codec.h:325
int32 id(CharIterator begin, CharIterator end) const
Get the DCCL ID of an unknown encoded DCCL message (Iterator overload).
int32 id() const
Gives the DCCL id (defined by the custom message option extension "(dccl.msg).id" in the ....
Definition codec.h:218
void unload_library(void *dl_handle)
Remove codecs and/or unload messages present in the given shared library handle.
Definition codec.cpp:671
unsigned max_size()
Provides the encoded maximum size (in bytes) of msg.
Definition codec.h:363
void encode(std::string *bytes, const google::protobuf::Message &msg, bool header_only=false, int user_id=-1)
Encodes a DCCL message.
Definition codec.cpp:272
void set_console_width(unsigned num_chars)
Set the number of characters used in programmatic generation of console outputs.
Definition codec.h:181
void unload()
Unload a given message.
Definition codec.h:129
void set_crypto_passphrase(const std::string &passphrase, const std::set< int32 > &do_not_encrypt_ids=std::set< int32 >())
Set a passphrase to be used when encoded messages to encrypt them and to decrypt messages after decod...
Definition codec.cpp:683
virtual ~Codec()
Destructor.
Definition codec.cpp:85
CharIterator decode(CharIterator begin, CharIterator end, ProtobufMessage *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition codec.h:538
Codec(const std::string &dccl_id_codec_name, const IDFieldCodec &dccl_id_codec)
Instantiate a Codec with a non-default identifier field codec (loaded directly).
Definition codec.h:82
void load_library(void *dl_handle)
Add codecs and/or load messages present in the given shared library handle.
Definition codec.cpp:659
unsigned size(const google::protobuf::Message &msg, int user_id=-1)
Provides the encoded size (in bytes) of msg. This is useful if you need to know the size of a message...
Definition codec.cpp:447
std::size_t load()
All messages must be explicited loaded and validated (size checks, option extensions checks,...
Definition codec.h:121
void decode(const std::string &bytes, ProtobufMessage *msg, bool header_only=false)
Decode a DCCL message when the type is known at compile time.
Definition codec.h:315
Exception class for DCCL.
Definition exception.h:47
std::enable_if< std::is_base_of< google::protobuf::Message, typenameCodec::wire_type >::value &&!std::is_same< google::protobuf::Message, typenameCodec::wire_type >::value, void >::type add(const std::string &name)
Add a new field codec (used for codecs operating on statically generated Protobuf messages,...
std::shared_ptr< FieldCodecBase > find(const google::protobuf::FieldDescriptor *field, int codec_version, bool has_codec_group, const std::string &codec_group) const
Find the codec for a given field. For embedded messages, prefers (dccl.field).codec (inside field) ov...
bool is(logger::Verbosity verbosity, logger::Group group=logger::GENERAL)
Indicates the verbosity of the Logger until the next std::flush or std::endl. The boolean return is u...
Definition logger.h:192
Dynamic Compact Control Language namespace.
Definition any.h:47
google::protobuf::int32 int32
a signed 32 bit integer
Definition common.h:58
google::protobuf::uint32 uint32
an unsigned 32 bit integer
Definition common.h:56
void hex_encode(CharIterator begin, CharIterator end, std::string *out, bool upper_case=false)
Encodes a (little-endian) hexadecimal string from a byte string. Index 0 of begin is written to index...
Definition binary.h:100