.. _program_listing_file_src_layers_generic.h: Program Listing for File generic.h ================================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/layers/generic.h``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #pragma once #include "common/definitions.h" #include "graph/expression_operators.h" #include "marian.h" #include "data/shortlist.h" #include "layers/factory.h" namespace marian { namespace mlp { enum struct act : int { linear, tanh, sigmoid, ReLU, LeakyReLU, PReLU, swish }; } // namespace mlp } // namespace marian namespace marian { class LayerBase { protected: Ptr graph_; Ptr options_; public: LayerBase(Ptr graph, Ptr options) : graph_(graph), options_(options) {} template T opt(const std::string key) const { return options_->get(key); } template T opt(const std::string key, const T& defaultValue) const { return options_->get(key, defaultValue); } }; struct IUnaryLayer { virtual ~IUnaryLayer() {} virtual Expr apply(Expr) = 0; virtual Expr apply(const std::vector& es) { ABORT_IF(es.size() > 1, "Not implemented"); // simple stub return apply(es.front()); } }; struct IHasShortList { virtual void setShortlist(Ptr shortlist) = 0; virtual void clear() = 0; }; struct IEmbeddingLayer { virtual std::tuple apply( Ptr subBatch) const = 0; virtual Expr apply(const Words& embIdx, const Shape& shape) const = 0; // alternative from indices directly virtual Expr applyIndices(const std::vector& embIdx, const Shape& shape) const = 0; virtual ~IEmbeddingLayer() {} }; class EncoderDecoderLayerBase : public LayerBase { protected: const std::string prefix_; const bool embeddingFix_; const float dropoutEmbeddings_; // this drops out full embedding vectors const bool inference_; const size_t batchIndex_; mutable std::vector> embeddingLayers_; // (lazily created) EncoderDecoderLayerBase(Ptr graph, Ptr options, const std::string& prefix, size_t batchIndex, float dropoutEmbeddings, bool embeddingFix) : LayerBase(graph, options), prefix_(options->get("prefix", prefix)), embeddingFix_(embeddingFix), dropoutEmbeddings_(dropoutEmbeddings), inference_(options->get("inference", false)), batchIndex_(options->get("index", batchIndex)) {} virtual ~EncoderDecoderLayerBase() {} private: Ptr createEmbeddingLayer() const; Ptr createULREmbeddingLayer() const; public: Ptr getEmbeddingLayer(bool ulr = false) const; }; namespace mlp { class Dense : public LayerBase, public IUnaryLayer { public: Dense(Ptr graph, Ptr options) : LayerBase(graph, options) {} Expr apply(const std::vector& inputs) override { ABORT_IF(inputs.empty(), "No inputs"); auto name = opt("prefix"); auto dim = opt("dim"); auto useLayerNorm = opt("layer-normalization", false); auto useNematusNorm = opt("nematus-normalization", false); auto activation = (act)opt("activation", (int)act::linear); auto g = graph_; std::vector outputs; size_t i = 0; std::string num; for(auto&& in : inputs) { if(inputs.size() > 1) num = std::to_string(i); Expr W = g->param(name + "_W" + num, {in->shape()[-1], dim}, inits::glorotUniform()); Expr b = g->param(name + "_b" + num, {1, dim}, inits::zeros()); if(useLayerNorm) { if(useNematusNorm) { auto ln_s = g->param(name + "_ln_s" + num, {1, dim}, inits::fromValue(1.f)); auto ln_b = g->param(name + "_ln_b" + num, {1, dim}, inits::zeros()); outputs.push_back(layerNorm(affine(in, W, b), ln_s, ln_b, NEMATUS_LN_EPS)); } else { auto gamma = g->param(name + "_gamma" + num, {1, dim}, inits::fromValue(1.0)); outputs.push_back(layerNorm(dot(in, W), gamma, b)); } } else { outputs.push_back(affine(in, W, b)); } i++; } // clang-format off switch(activation) { case act::linear: return plus(outputs); case act::tanh: return tanh(outputs); case act::sigmoid: return sigmoid(outputs); case act::ReLU: return relu(outputs); case act::LeakyReLU: return leakyrelu(outputs); case act::PReLU: return prelu(outputs); case act::swish: return swish(outputs); default: return plus(outputs); } // clang-format on }; Expr apply(Expr input) override { return apply(std::vector({input})); } }; } // namespace mlp // --- a few layers with built-in parameters created on the fly, without proper object // @TODO: change to a proper layer object static inline std::function activationByName(const std::string& actName) { if (actName == "relu") return (ActivationFunction*)relu; else if (actName == "swish") return (ActivationFunction*)swish; else if (actName == "gelu") return (ActivationFunction*)gelu; else if (actName == "sigmoid") return (ActivationFunction*)sigmoid; else if (actName == "") // return identity function if activation name is empty return [](Expr x) { return x; }; ABORT("Invalid activation name '{}'", actName); } // like affine() but with built-in parameters, activation, and dropout static inline Expr denseInline(Expr x, std::string prefix, std::string suffix, int outDim, Ptr initFn = inits::glorotUniform(), std::string actName = "", float dropProb = 0.0f) { auto graph = x->graph(); auto W = graph->param(prefix + "_W" + suffix, {x->shape()[-1], outDim}, initFn); auto b = graph->param(prefix + "_b" + suffix, {1, outDim}, inits::zeros()); if(actName == "relu") { x = affineWithRelu(x, W, b); // speed optimization for inference, @TODO: handle better in future layer framework } else { x = affine(x, W, b); x = activationByName(actName)(x); } x = dropout(x, dropProb); // @TODO: check for infernce? return x; } static inline Expr layerNorm(Expr x, std::string prefix, std::string suffix = std::string()) { int dimModel = x->shape()[-1]; auto scale = x->graph()->param(prefix + "_ln_scale" + suffix, {1, dimModel}, inits::ones()); auto bias = x->graph()->param(prefix + "_ln_bias" + suffix, {1, dimModel}, inits::zeros()); return marian::layerNorm(x, scale, bias, 1e-6f); } static inline Expr rmsNorm(Expr x, std::string prefix, std::string suffix = std::string()) { int dimModel = x->shape()[-1]; auto scale = x->graph()->param(prefix + "_rms_scale" + suffix, {1, dimModel}, inits::ones()); return marian::rmsNorm(x, scale, nullptr, 1e-6f); } } // namespace marian