.. _program_listing_file_src_common_file_stream.cpp: Program Listing for File file_stream.cpp ======================================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/common/file_stream.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "common/file_stream.h" #include "common/utils.h" #include #include #include #include #ifdef _MSC_VER #include #include #include #include #else #include #include #endif namespace marian { namespace io { InputFileStream::InputFileStream(const std::string &file) : std::istream(NULL) { // the special syntax "command |" starts command in a sh shell and reads out its result if (marian::utils::endsWith(file, "|")) { #ifdef __unix__ auto command = file.substr(0, file.size() - 1); // open as a pipe pipe_ = popen(command.c_str(), "r"); ABORT_IF(!pipe_, "Command failed to execute ({}): {}", errno, command); // there is no official way to construct a filebuf from a FILE* or fd, so we use /proc/{pid}/fd/{fd} // For now, this only works on Linux. There are similar workarounds for Windows. file_ = "/proc/" + std::to_string(getpid()) + "/fd/" + std::to_string(fileno(pipe_)); #else ABORT("Pipe syntax not supported in this build of Marian: {}", file); #endif } else { ABORT_IF(!marian::filesystem::exists(file), "File '{}' does not exist", file); file_ = file; } streamBuf1_.reset(new std::filebuf()); auto ret = static_cast(streamBuf1_.get())->open(file_.string().c_str(), std::ios::in | std::ios::binary); ABORT_IF(!ret, "Error opening file ({}): {}", errno, file_.string()); ABORT_IF(ret != streamBuf1_.get(), "Return value is not equal to streambuf pointer, that is weird"); // insert .gz decompression if(marian::utils::endsWith(file, ".gz")) { streamBuf2_ = std::move(streamBuf1_); streamBuf1_.reset(new zstr::istreambuf(streamBuf2_.get())); } // initialize the underlying istream this->init(streamBuf1_.get()); } InputFileStream::~InputFileStream() { #ifdef __unix__ // (pipe syntax is only supported on UNIX-like OS) if (pipe_) pclose(pipe_); // non-NULL if pipe syntax was used #endif } bool InputFileStream::empty() { return this->peek() == std::ifstream::traits_type::eof(); } void InputFileStream::setbufsize(size_t size) { rdbuf()->pubsetbuf(0, 0); readBuf_.resize(size); rdbuf()->pubsetbuf(readBuf_.data(), readBuf_.size()); } std::string InputFileStream::getFileName() const { return file_.string(); } // wrapper around std::getline() that handles Windows input files with extra CR // chars at the line end std::istream &getline(std::istream &in, std::string &line) { std::getline(in, line); // bad() seems to be correct here. Should not abort on EOF. ABORT_IF(in.bad(), "Error reading from stream"); // strip terminal CR if present if(in && !line.empty() && line.back() == in.widen('\r')) line.pop_back(); return in; } OutputFileStream::OutputFileStream(const std::string &file) : std::ostream(NULL), file_(file) { streamBuf1_.reset(new std::filebuf()); auto ret = static_cast(streamBuf1_.get())->open(file.c_str(), std::ios::out | std::ios_base::binary); ABORT_IF(!ret, "File cannot be opened", file); ABORT_IF(ret != streamBuf1_.get(), "Return value is not equal to streambuf pointer, that is weird"); if(file_.extension() == marian::filesystem::Path(".gz")) { streamBuf2_.reset(new zstr::ostreambuf(streamBuf1_.get())); this->init(streamBuf2_.get()); } else { this->init(streamBuf1_.get()); } } OutputFileStream::OutputFileStream() : std::ostream(NULL) {} OutputFileStream::~OutputFileStream() { this->flush(); } std::string OutputFileStream::getFileName() const { return file_.string(); } TemporaryFile::TemporaryFile(const std::string &base, bool earlyUnlink) : OutputFileStream(), unlink_(earlyUnlink) { std::string baseTemp(base); NormalizeTempPrefix(baseTemp); MakeTemp(baseTemp); inSteam_ = UPtr(new io::InputFileStream(file_.string())); if(unlink_) { ABORT_IF(remove(file_.string().c_str()), "Error while deleting '{}'", file_.string()); } } TemporaryFile::~TemporaryFile() { if(!unlink_) // We do not check for errors here as this is the destructor and we cannot really fix an error anyway. remove(file_.string().c_str()), "Error while deleting '{}'", file_.string(); } void TemporaryFile::NormalizeTempPrefix(std::string &base) const { if(base.empty()) return; #ifdef _MSC_VER if(base.substr(0, 4) == "/tmp") base = getenv("TMP"); #else if(base[base.size() - 1] == '/') return; struct stat sb; // It's fine for it to not exist. if(stat(base.c_str(), &sb) == -1) return; if(S_ISDIR(sb.st_mode)) base += '/'; #endif } void TemporaryFile::MakeTemp(const std::string &base) { #ifdef _MSC_VER char *name = tempnam(base.c_str(), "marian."); ABORT_IF(name == NULL, "Error while making a temporary based on '{}'", base); int oflag = _O_RDWR | _O_CREAT | _O_EXCL; if(unlink_) oflag |= _O_TEMPORARY; int fd = open(name, oflag, _S_IREAD | _S_IWRITE); ABORT_IF(fd == -1, "Error while making a temporary based on '{}'", base); file_ = std::string(name); #else // create temp file std::string name(base); name += "marian.XXXXXX"; name.push_back(0); int fd = mkstemp(&name[0]); ABORT_IF(fd == -1, "Error creating temp file {}", name); file_ = name; #endif // open again with c++ streamBuf1_.reset(new std::filebuf()); auto ret = static_cast(streamBuf1_.get())->open(name, std::ios::out | std::ios_base::binary); ABORT_IF(!streamBuf1_, "File {} cannot be temp opened", name); ABORT_IF(ret != streamBuf1_.get(), "Return value ({}) is not equal to streambuf pointer ({}), that is weird.", (size_t)ret, (size_t)streamBuf1_.get()); this->init(streamBuf1_.get()); // close original file descriptor ABORT_IF(close(fd), "Can't close file descriptor", name); #ifdef _MSC_VER free(name); #endif } UPtr TemporaryFile::getInputStream() { return std::move(inSteam_); } std::string TemporaryFile::getFileName() const { return file_.string(); } } // namespace io } // namespace marian