Program Listing for File alignment.h¶
↰ Return to documentation for file (src/data/alignment.h
)
#pragma once
#include <sstream>
#include <tuple>
#include <vector>
namespace marian {
namespace data {
class WordAlignment {
public:
struct Point {
size_t srcPos;
size_t tgtPos;
float prob;
};
private:
std::vector<Point> data_;
public:
WordAlignment();
private:
WordAlignment(const std::vector<Point>& align);
public:
WordAlignment(const std::string& line, size_t srcEosPos, size_t tgtEosPos);
Point& operator[](size_t i) { return data_[i]; }
auto begin() const -> decltype(data_.begin()) { return data_.begin(); }
auto end() const -> decltype(data_.end()) { return data_.end(); }
void push_back(size_t s, size_t t, float p) { data_.emplace_back(Point{ s, t, p }); }
size_t size() const { return data_.size(); }
void sort();
void normalize(bool reverse=false);
std::string toString() const;
};
// soft alignment = P(src pos|trg pos) for each beam and batch index, stored in a flattened CPU-side array
// Also used on QuickSAND boundary where beam and batch size is 1. Then it is simply [t][s] -> P(s|t)
typedef std::vector<std::vector<float>> SoftAlignment; // [trg pos][beam depth * max src length * batch size]
WordAlignment ConvertSoftAlignToHardAlign(const SoftAlignment& alignSoft,
float threshold = 1.f);
std::string SoftAlignToString(SoftAlignment align);
} // namespace data
} // namespace marian