.. _program_listing_file_src_common_fastopt.h: Program Listing for File fastopt.h ================================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/common/fastopt.h``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #pragma once #include "common/definitions.h" #include "3rd_party/any_type.h" #include "3rd_party/phf/phf.h" #include "3rd_party/yaml-cpp/yaml.h" // This file contains code to create a fast access option class, // meant as a replacment/supplement to YAML::Node. namespace marian { namespace crc { // has to stay in header due to constexpr // This code comes from https://notes.underscorediscovery.com/constexpr-fnv1a/ // and is distributed as public domain as stated by the author under that link // constants for hash computations constexpr uint64_t val_64_const = 0xcbf29ce484222325; constexpr uint64_t prime_64_const = 0x100000001b3; // recursive compile-time hash, looking for stack-overflow source inline constexpr uint64_t hash_64_fnv1a_const(const char* const str, const uint64_t value = val_64_const) noexcept { return (str[0] == '\0') ? value : hash_64_fnv1a_const(&str[1], (value ^ uint64_t(str[0])) * prime_64_const); } // Compile time string hashing. Should work particularly well for option look up with explicitly used keys like options->get("dim-input"); inline constexpr uint64_t crc(const char* const str) noexcept { return hash_64_fnv1a_const(str); } } /*****************************************************************************/ // PerfectHash constructs a perfect hash for a set K of n numeric keys. The size of // the hash is m > n (not much larger) and n << max(K) (much smaller). The output array size is // determined by PHF::init in "src/3rd_party/phf/phf.h". m - n fields stay undefined (a bit of waste). // Wrapper class for the 3rd-party library in "src/3rd_party/phf" class PerfectHash { private: phf phf_; PerfectHash(const uint64_t keys[], size_t num) { int error = PHF::init(&phf_, keys, num, /* bucket size */ 4, /* loading factor */ 90, /* seed */ 123456); ABORT_IF(error != 0, "PHF error {}", error); } public: PerfectHash(const std::vector& v) : PerfectHash(v.data(), v.size()) { } ~PerfectHash() { PHF::destroy(&phf_); } // subscript operator [] overloading: if the key is uint64_t, return the hash code directly uint32_t operator[](const uint64_t& key) const { return PHF::hash(const_cast(&phf_), key); } // If the key is a string, return the hash code for the string's CRC code uint32_t operator[](const char* const keyStr) const { return (*this)[crc::crc(keyStr)]; } size_t size() const { return phf_.m; } }; /*****************************************************************************/ class FastOpt; // helper class for conversion, see fastopt.cpp namespace fastopt_helpers { template struct As { static T apply(const FastOpt&); }; template struct As> { static std::vector apply(const FastOpt&); }; template struct As> { static std::pair apply(const FastOpt&); }; } // Fast access option class, meant as a replacment/supplement to YAML::Node. // Relatively expensive to construct, fast to access (not visible in profiler) // via std::vector or perfect hash. The perfect hash only requires a few look-ups // and arithmentic operations, still O(1). // Still requires YAML::Node support for parsing and modification via rebuilding. class FastOpt { private: template friend struct fastopt_helpers::As; public: // Node types for FastOpt, seem to be enough to cover YAML:NodeType // Multi-element types include "Sequence" and "Map" // "Sequence" is implemented with STL vectors // "Map" is implemented with a 3rd-party PHF library (see the PerfectHash class) enum struct NodeType { Null, Bool, Int64, Float64, String, Sequence, Map }; private: any_type value_; std::unique_ptr ph_; std::vector> array_; NodeType type_{NodeType::Null}; static const std::unique_ptr uniqueNullPtr; // return this unique_ptr if key not found, equivalent to nullptr uint64_t fingerprint_{0}; // When node is used as a value in a map, used to check if the perfect hash // returned the right value (they can produce false positives) size_t elements_{0}; // Number of elements if isMap or isSequence is true, 0 otherwise. // Used to find elements if isSequence() is true. // Retrieve the entry using array indexing. inline const std::unique_ptr& arrayLookup(size_t keyId) const { if(keyId < array_.size()) return array_[keyId]; else return uniqueNullPtr; } // Used to find elements if isMap() is true. // Retrieve the entry from the hash table. inline const std::unique_ptr& phLookup(uint64_t keyId) const { if(ph_) return array_[(*ph_)[keyId]]; else return uniqueNullPtr; } // Builders for different types of nodes. // Build Null node. void makeNull() { elements_ = 0; type_ = NodeType::Null; ABORT_IF(ph_, "ph_ should be undefined"); ABORT_IF(!array_.empty(), "array_ should be empty"); } // Build Scalar node via controlled failure to convert from a YAML::Node object. void makeScalar(const YAML::Node& v) { elements_ = 0; // Placeholders for decode bool asBool; int64_t asInt; double asDouble; // Text boolean values should be treated as a string auto asString = v.as(); bool isTextBool = asString.size() == 1 && asString.find_first_of("nyNYtfTF") == 0; if(YAML::convert::decode(v, asBool) && !isTextBool) { value_ = asBool; type_ = NodeType::Bool; } else if(YAML::convert::decode(v, asInt)) { value_ = asInt; type_ = NodeType::Int64; } else if(YAML::convert::decode(v, asDouble)) { value_ = asDouble; type_ = NodeType::Float64; } else { value_ = asString; type_ = NodeType::String; } ABORT_IF(ph_, "ph_ should be undefined"); ABORT_IF(!array_.empty(), "array_ should be empty"); } // Build a Sequence node, can by converted to std::vector if elements can be converted to T. void makeSequence(const std::vector& v) { elements_ = v.size(); ABORT_IF(!array_.empty(), "array_ is not empty??"); for(size_t pos = 0; pos < v.size(); ++pos) { array_.emplace_back(new FastOpt(v[pos], pos)); } type_ = NodeType::Sequence; ABORT_IF(ph_, "ph_ should be undefined"); } // Build a Map node. void makeMap(const std::map& m) { std::vector keys; for(const auto& it : m) keys.push_back(it.first); ABORT_IF(ph_, "ph_ is already defined??"); ph_.reset(new PerfectHash(keys)); ABORT_IF(!array_.empty(), "array_ is not empty??"); // for lack of resize_emplace for(int i = 0; i < ph_->size(); ++i) array_.emplace_back(nullptr); elements_ = keys.size(); for(const auto& it : m) { uint64_t key = it.first; size_t pos = (*ph_)[key]; array_[pos].reset(new FastOpt(it.second, key)); } type_ = NodeType::Map; } // Build a Map node, uses std::string as key, which gets hashed to uint64_t and used in the function above. void makeMap(const std::map& m) { std::map mi; for(const auto& it : m) { auto key = it.first.c_str(); mi[crc::crc(key)] = it.second; } makeMap(mi); } // Only build from YAML::Node FastOpt(const FastOpt&) = delete; FastOpt() = delete; void construct(const YAML::Node& node) { switch(node.Type()) { case YAML::NodeType::Scalar: makeScalar(node); break; case YAML::NodeType::Sequence: { std::vector nodesVec; for(auto&& n : node) nodesVec.push_back(n); makeSequence(nodesVec); } break; case YAML::NodeType::Map: { std::map nodesMap; for(auto& n : node) { auto key = n.first.as(); nodesMap[key] = n.second; } makeMap(nodesMap); } break; case YAML::NodeType::Undefined: case YAML::NodeType::Null: makeNull(); } } public: // Constructor to recursively create a FastOpt object from a YAML::Node following the yaml structure. FastOpt(const YAML::Node& node) { construct(node); } FastOpt(const YAML::Node& node, uint64_t fingerprint) : fingerprint_{fingerprint} { construct(node); } // Predicates for node types bool isSequence() const { return type_ == NodeType::Sequence; } bool isMap() const { return type_ == NodeType::Map; } bool isScalar() const { return type_ == NodeType::Bool || type_ == NodeType::Float64 || type_ == NodeType::Int64 || type_ == NodeType::String; } bool isNull() const { return type_ == NodeType::Null; } bool isInt() const { return type_ == NodeType::Int64; } bool isBool() const { return type_ == NodeType::Bool; } bool isFloat() const { return type_ == NodeType::Float64; } bool isString() const { return type_ == NodeType::String; } // actual number of elements in a sequence or map, 0 (not 1) for scalar nodes. // 0 here means rather "not applicable". size_t size() const { return elements_; } // replace current node with an externally built FastOpt object void swap(FastOpt& other) { std::swap(value_, other.value_); std::swap(ph_, other.ph_); std::swap(array_, other.array_); std::swap(type_, other.type_); std::swap(elements_, other.elements_); // leave fingerprint alone as it needed by parent node. } // Is the hashed key in a map? bool has(uint64_t keyId) const { if(isMap() && elements_ > 0) { const auto& ptr = phLookup(keyId); return ptr ? ptr->fingerprint_ == keyId : false; } else { return false; } } bool has(const char* const key) const { return has(crc::crc(key)); } bool has(const std::string& key) const { return has(key.c_str()); } // convert to requested type template inline T as() const { return fastopt_helpers::As::apply(*this); } // access sequence or map element const FastOpt& operator[](uint64_t keyId) const { if(isSequence()) { const auto& ptr = arrayLookup((size_t)keyId); ABORT_IF(!ptr, "Unseen key {}" , keyId); return *ptr; } else if(isMap()) { const auto& ptr = phLookup(keyId); ABORT_IF(!ptr || ptr->fingerprint_ != keyId, "Unseen key {}", keyId); return *ptr; } else { ABORT("Not a sequence or map node"); } } // operator [] overloading for non-uint64_t keys const FastOpt& operator[](int key) const { return operator[]((uint64_t)key); } const FastOpt& operator[](const char* const key) const { return operator[](crc::crc(key)); } const FastOpt& operator[](const std::string& key) const { return operator[](key.c_str()); } }; }