Program Listing for File alignment.h

Return to documentation for file (src/data/alignment.h)

#pragma once

#include <sstream>
#include <tuple>
#include <vector>

namespace marian {
namespace data {

class WordAlignment {
public:
  struct Point {
      size_t srcPos;
      size_t tgtPos;
      float prob;
  };
private:
  std::vector<Point> data_;

public:
  WordAlignment();

private:
  WordAlignment(const std::vector<Point>& align);
public:

  WordAlignment(const std::string& line, size_t srcEosPos, size_t tgtEosPos);

  Point& operator[](size_t i) { return data_[i]; }

  auto begin() const -> decltype(data_.begin()) { return data_.begin(); }
  auto end()   const -> decltype(data_.end())   { return data_.end(); }

  void push_back(size_t s, size_t t, float p) { data_.emplace_back(Point{ s, t, p }); }

  size_t size() const { return data_.size(); }

  void sort();

  void normalize(bool reverse=false);

  std::string toString() const;
};

// soft alignment = P(src pos|trg pos) for each beam and batch index, stored in a flattened CPU-side array
// Also used on QuickSAND boundary where beam and batch size is 1. Then it is simply [t][s] -> P(s|t)
typedef std::vector<std::vector<float>> SoftAlignment; // [trg pos][beam depth * max src length * batch size]

WordAlignment ConvertSoftAlignToHardAlign(const SoftAlignment& alignSoft,
                                          float threshold = 1.f);

std::string SoftAlignToString(SoftAlignment align);

}  // namespace data
}  // namespace marian