.. _program_listing_file_src_data_text_input.cpp: Program Listing for File text_input.cpp ======================================= |exhale_lsh| :ref:`Return to documentation for file ` (``src/data/text_input.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "data/text_input.h" #include "common/utils.h" namespace marian { namespace data { TextIterator::TextIterator() : pos_(-1), tup_(0) {} TextIterator::TextIterator(TextInput& corpus) : corpus_(&corpus), pos_(0), tup_(corpus_->next()) {} void TextIterator::increment() { tup_ = corpus_->next(); pos_++; } bool TextIterator::equal(TextIterator const& other) const { return this->pos_ == other.pos_ || (!this->tup_.valid() && !other.tup_.valid()); } const SentenceTuple& TextIterator::dereference() const { return tup_; } TextInput::TextInput(std::vector inputs, std::vector> vocabs, Ptr options) : DatasetBase(inputs, options), vocabs_(vocabs), maxLength_(options_->get("max-length")), maxLengthCrop_(options_->get("max-length-crop")) { // Note: inputs are automatically stored in the inherited variable named paths_, but these are // texts not paths! for(const auto& text : paths_) files_.emplace_back(new std::istringstream(text)); } // TextInput is mainly used for inference in the server mode, not for training, so skipping too long // or ill-formed inputs is not necessary here SentenceTuple TextInput::next() { // get index of the current sentence size_t curId = pos_++; // fill up the sentence tuple with source and/or target sentences SentenceTupleImpl tup(curId); for(size_t i = 0; i < files_.size(); ++i) { std::string line; if(io::getline(*files_[i], line)) { Words words = vocabs_[i]->encode(line, /*addEOS=*/true, /*inference=*/inference_); if(this->maxLengthCrop_ && words.size() > this->maxLength_) { words.resize(maxLength_); words.back() = vocabs_.back()->getEosId(); // note: this will not work with class-labels } ABORT_IF(words.empty(), "No words (not even EOS) found in string??"); ABORT_IF(tup.size() != i, "Previous tuple elements are missing."); tup.push_back(words); } } if(tup.size() == files_.size()) // check if each input file provided an example return SentenceTuple(tup); else if(tup.size() == 0) // if no file provided examples we are done return SentenceTupleImpl(); // return an empty tuple if above test does not pass(); else // neither all nor none => we have at least on missing entry ABORT("There are missing entries in the text tuples."); } } // namespace data } // namespace marian