Program Listing for File marian_conv.cpp

Return to documentation for file (src/command/marian_conv.cpp)

#include "marian.h"
#include "common/cli_wrapper.h"
#include "tensors/cpu/expression_graph_packable.h"
#include "onnx/expression_graph_onnx_exporter.h"
#include "layers/lsh.h"
#include "data/shortlist.h"
#include <sstream>

int main(int argc, char** argv) {
  using namespace marian;

  createLoggers();

  auto options = New<Options>();
  {
    YAML::Node config; // @TODO: get rid of YAML::Node here entirely to avoid the pattern. Currently not fixing as it requires more changes to the Options object.
    auto cli = New<cli::CLIWrapper>(
        config,
        "Convert a model in the .npz format and normal memory layout to a mmap-able binary model which could be in normal memory layout or packed memory layout\n"
        "or convert a text lexical shortlist to a binary shortlist with {--shortlist,-s} option",
        "Allowed options",
        "Examples:\n"
        "  ./marian-conv -f model.npz -t model.bin --gemm-type packed16");
    cli->add<std::string>("--from,-f", "Input model", "model.npz");
    cli->add<std::string>("--to,-t", "Output model", "model.bin");
    cli->add<std::string>("--export-as", "Kind of conversion: marian-bin or onnx-{encode,decoder-step,decoder-init,decoder-stop}", "marian-bin");
    cli->add<std::string>("--gemm-type,-g", "GEMM Type to be used: float32, packed16, packed8avx2, packed8avx512, "
                          "intgemm8, intgemm8ssse3, intgemm8avx2, intgemm8avx512, intgemm16, intgemm16sse2, intgemm16avx2, intgemm16avx512",
                          "float32");
    cli->add<std::vector<std::string>>("--add-lsh",
                                       "Encode output matrix and optional rotation matrix into model file. "
                                       "arg1: number of bits in LSH encoding, arg2: name of output weights matrix")->implicit_val("1024 Wemb");
    cli->add<std::vector<std::string>>("--vocabs,-V", "Vocabulary file, required for ONNX export");
    cli->add<std::vector<std::string>>("--shortlist,-s", "Shortlist conversion: filePath firstNum bestNum threshold");
    cli->add<std::string>("--dump-shortlist,-d", "Binary shortlist dump path","lex.bin");
    cli->parse(argc, argv);
    options->merge(config);
  }

  // shortlist conversion:
  // ./marian-conv --shortlist lex.esen.s2t 100 100 0 --dump-shortlist lex.esen.bin --vocabs vocab.esen.spm vocab.esen.spm
  if(options->hasAndNotEmpty("shortlist")){
    auto vocabPaths = options->get<std::vector<std::string>>("vocabs");
    auto dumpPath = options->get<std::string>("dump-shortlist");

    Ptr<Vocab> srcVocab = New<Vocab>(options, 0);
    srcVocab->load(vocabPaths[0]);
    Ptr<Vocab> trgVocab = New<Vocab>(options, 1);
    trgVocab->load(vocabPaths[1]);

    Ptr<const data::ShortlistGenerator> binaryShortlistGenerator
        = New<data::BinaryShortlistGenerator>(options, srcVocab, trgVocab, 0, 1, vocabPaths[0] == vocabPaths[1]);
    binaryShortlistGenerator->dump(dumpPath);
    LOG(info, "Dumping of the shortlist is finished");
    return 0;
  }

  auto modelFrom = options->get<std::string>("from");
  auto modelTo = options->get<std::string>("to");

  auto exportAs = options->get<std::string>("export-as");
  auto vocabPaths = options->get<std::vector<std::string>>("vocabs");// , std::vector<std::string>());

  bool addLsh = options->hasAndNotEmpty("add-lsh");
  int lshNBits = 1024;
  std::string lshOutputWeights = "Wemb";
  if(addLsh) {
    auto lshParams   = options->get<std::vector<std::string>>("add-lsh");
    lshNBits         = std::stoi(lshParams[0]);
    if(lshParams.size() > 1)
      lshOutputWeights = lshParams[1];
  }

  // We accept any type here and will later croak during packAndSave if the type cannot be used for conversion
  Type saveGemmType = typeFromString(options->get<std::string>("gemm-type", "float32"));

  LOG(info, "Outputting {}, precision: {}", modelTo, saveGemmType);

  YAML::Node config;
  std::stringstream configStr;
  marian::io::getYamlFromModel(config, "special:model.yml", modelFrom);
  configStr << config;

  if (exportAs == "marian-bin") {
    auto graph = New<ExpressionGraphPackable>();
    graph->setDevice(CPU0);
    graph->load(modelFrom);

    std::vector<lsh::ParamConvInfo> toBeLSHed;
    if(addLsh) {
      // Add dummy parameters for the LSH before the model gets actually initialized.
      // This create the parameters with useless values in the tensors, but it gives us the memory we need.
      toBeLSHed = {
        {lshOutputWeights, "lsh_output_codes", "lsh_output_rotation", lshNBits}
      };

      graph->setReloaded(false);
      for(auto p : toBeLSHed)
        lsh::addDummyParameters(graph, /*paramInfo=*/p);
      graph->setReloaded(true);
    }

    graph->forward();  // run the initializers

    if(addLsh) {
      // After initialization, hijack the paramters for the LSH and force-overwrite with correct values.
      // Once this is done we can just pack and save as normal.
      for(auto p : toBeLSHed)
        lsh::overwriteDummyParameters(graph, /*paramInfo=*/p);
    }

    // added a flag if the weights needs to be packed or not
    graph->packAndSave(modelTo, configStr.str(), /* --gemm-type */ saveGemmType, Type::float32);
  }
  else if (exportAs == "onnx-encode") {
#ifdef USE_ONNX
    auto graph = New<ExpressionGraphONNXExporter>();
    graph->setDevice(CPU0);
    graph->load(modelFrom);
    graph->forward();  // run the initializers
    auto modelOptions = New<Options>(config)->with("vocabs", vocabPaths, "inference", true);

    graph->exportToONNX(modelTo, modelOptions, vocabPaths);
#else
    ABORT("--export-as onnx-encode requires Marian to be built with USE_ONNX=ON");
#endif // USE_ONNX
  }
  else
    ABORT("Unknown --export-as value: {}", exportAs);

  // graph->saveBinary(vm["bin"].as<std::string>());

  LOG(info, "Finished");

  return 0;
}