.. _program_listing_file_src_models_encoder_decoder.cpp: Program Listing for File encoder_decoder.cpp ============================================ |exhale_lsh| :ref:`Return to documentation for file ` (``src/models/encoder_decoder.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "models/encoder_decoder.h" #include "common/cli_helper.h" #include "common/filesystem.h" #include "common/version.h" namespace marian { EncoderDecoder::EncoderDecoder(Ptr graph, Ptr options) : LayerBase(graph, options), prefix_(options->get("prefix", "")), inference_(options->get("inference", false)) { std::vector encoderDecoderModelFeatures = {"type", "dim-vocabs", "dim-emb", "dim-rnn", "enc-cell", "enc-type", "enc-cell-depth", "enc-depth", "dec-depth", "dec-cell", "dec-cell-base-depth", "dec-cell-high-depth", "skip", "layer-normalization", "right-left", "input-types", "special-vocab", "tied-embeddings", "tied-embeddings-src", "tied-embeddings-all"}; for(auto feature : encoderDecoderModelFeatures) modelFeatures_.insert(feature); modelFeatures_.insert("transformer-heads"); modelFeatures_.insert("transformer-no-projection"); modelFeatures_.insert("transformer-dim-ffn"); modelFeatures_.insert("transformer-decoder-dim-ffn"); modelFeatures_.insert("transformer-ffn-depth"); modelFeatures_.insert("transformer-decoder-ffn-depth"); modelFeatures_.insert("transformer-ffn-activation"); modelFeatures_.insert("transformer-dim-aan"); modelFeatures_.insert("transformer-aan-depth"); modelFeatures_.insert("transformer-aan-activation"); modelFeatures_.insert("transformer-aan-nogate"); modelFeatures_.insert("transformer-preprocess"); modelFeatures_.insert("transformer-postprocess"); modelFeatures_.insert("transformer-postprocess-emb"); modelFeatures_.insert("transformer-postprocess-top"); modelFeatures_.insert("transformer-decoder-autoreg"); modelFeatures_.insert("transformer-tied-layers"); modelFeatures_.insert("transformer-guided-alignment-layer"); modelFeatures_.insert("transformer-train-position-embeddings"); modelFeatures_.insert("transformer-pool"); modelFeatures_.insert("bert-train-type-embeddings"); modelFeatures_.insert("bert-type-vocab-size"); modelFeatures_.insert("ulr"); modelFeatures_.insert("ulr-trainable-transformation"); modelFeatures_.insert("ulr-dim-emb"); modelFeatures_.insert("lemma-dim-emb"); modelFeatures_.insert("output-omit-bias"); modelFeatures_.insert("lemma-dependency"); modelFeatures_.insert("factors-combine"); modelFeatures_.insert("factors-dim-emb"); } std::vector>& EncoderDecoder::getEncoders() { return encoders_; } void EncoderDecoder::push_back(Ptr encoder) { encoders_.push_back(encoder); } std::vector>& EncoderDecoder::getDecoders() { return decoders_; } void EncoderDecoder::push_back(Ptr decoder) { decoders_.push_back(decoder); } void EncoderDecoder::createDecoderConfig(const std::string& name) { Config::YamlNode decoder; if(options_->get("relative-paths")) { decoder["relative-paths"] = true; // we can safely use a bare model file name here, because the config file is created in the same // directory as the model file auto modelFileName = filesystem::Path{name}.filename().string(); decoder["models"] = std::vector({modelFileName}); // create relative paths to vocabs with regard to saved model checkpoint auto dirPath = filesystem::Path{name}.parentPath(); std::vector relativeVocabs; const auto& vocabs = options_->get>("vocabs"); std::transform( vocabs.begin(), vocabs.end(), std::back_inserter(relativeVocabs), [&](const std::string& p) -> std::string { return filesystem::relative(filesystem::Path{p}, dirPath).string(); }); decoder["vocabs"] = relativeVocabs; } else { decoder["relative-paths"] = false; decoder["models"] = std::vector({name}); decoder["vocabs"] = options_->get>("vocabs"); } decoder["beam-size"] = opt("beam-size"); decoder["normalize"] = opt("normalize"); decoder["word-penalty"] = opt("word-penalty"); decoder["mini-batch"] = opt("valid-mini-batch"); decoder["maxi-batch"] = opt("valid-mini-batch") > 1 ? 100 : 1; decoder["maxi-batch-sort"] = opt("valid-mini-batch") > 1 ? "src" : "none"; io::OutputFileStream out(name + ".decoder.yml"); out << decoder; } Config::YamlNode EncoderDecoder::getModelParameters() { Config::YamlNode modelParams; auto clone = options_->cloneToYamlNode(); for(auto& key : modelFeatures_) modelParams[key] = clone[key]; if(options_->has("original-type")) modelParams["type"] = clone["original-type"]; modelParams["version"] = buildVersion(); return modelParams; } std::string EncoderDecoder::getModelParametersAsString() { auto yaml = getModelParameters(); YAML::Emitter out; cli::OutputYaml(yaml, out); return std::string(out.c_str()); } void EncoderDecoder::load(Ptr graph, const std::vector& items, bool markedReloaded) { graph->load(items, markedReloaded && !opt("ignore-model-config", false)); } void EncoderDecoder::load(Ptr graph, const std::string& name, bool markedReloaded) { graph->load(name, markedReloaded && !opt("ignore-model-config", false)); } void EncoderDecoder::mmap(Ptr graph, const void* ptr, bool markedReloaded) { graph->mmap(ptr, markedReloaded && !opt("ignore-model-config", false)); } void EncoderDecoder::save(Ptr graph, const std::string& name, bool saveTranslatorConfig) { // ignore config for now LOG(info, "Saving model weights and runtime parameters to {}", name); graph->save(name, getModelParametersAsString()); if(saveTranslatorConfig) createDecoderConfig(name); } void EncoderDecoder::clear(Ptr graph) { graph->clear(); for(auto& enc : encoders_) enc->clear(); for(auto& dec : decoders_) dec->clear(); } Ptr EncoderDecoder::startState(Ptr graph, Ptr batch) { std::vector> encoderStates; for(auto& encoder : encoders_) encoderStates.push_back(encoder->build(graph, batch)); // initialize shortlist here if(shortlistGenerator_) { auto shortlist = shortlistGenerator_->generate(batch); decoders_[0]->setShortlist(shortlist); } return decoders_[0]->startState(graph, batch, encoderStates); } Ptr EncoderDecoder::step(Ptr graph, Ptr state, const std::vector& hypIndices, // [beamIndex * activeBatchSize + batchIndex] const Words& words, // [beamIndex * activeBatchSize + batchIndex] const std::vector& batchIndices, // [batchIndex] int beamSize) { // create updated state that reflects reordering and dropping of hypotheses state = hypIndices.empty() ? state : state->select(hypIndices, batchIndices, beamSize); // Fill state with embeddings based on last prediction decoders_[0]->embeddingsFromPrediction(graph, state, words, (int) batchIndices.size(), beamSize); auto nextState = decoders_[0]->step(graph, state); return nextState; } Ptr EncoderDecoder::stepAll(Ptr graph, Ptr batch, bool clearGraph) { if(clearGraph) clear(graph); // Required first step, also initializes shortlist auto state = startState(graph, batch); // Fill state with embeddings from batch (ground truth) decoders_[0]->embeddingsFromBatch(graph, state, batch); auto nextState = decoders_[0]->step(graph, state); nextState->setTargetMask(state->getTargetMask()); nextState->setTargetWords(state->getTargetWords()); return nextState; } Logits EncoderDecoder::build(Ptr graph, Ptr batch, bool clearGraph) { auto state = stepAll(graph, batch, clearGraph); // returns raw logits return state->getLogProbs(); } Logits EncoderDecoder::build(Ptr graph, Ptr batch, bool clearGraph) { auto corpusBatch = std::static_pointer_cast(batch); return build(graph, corpusBatch, clearGraph); } } // namespace marian