.. _program_listing_file_src_common_binary.cpp: Program Listing for File binary.cpp =================================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/common/binary.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "common/binary.h" #include "common/definitions.h" #include "common/file_stream.h" #include "common/io_item.h" #include "common/types.h" #include "tensors/cpu/integer_common.h" #include namespace marian { namespace io { namespace binary { struct Header { uint64_t nameLength; uint64_t type; uint64_t shapeLength; uint64_t dataLength; }; // cast current void pointer to T pointer and move forward by num elements template const T* get(const void*& current, uint64_t num = 1) { const T* ptr = (const T*)current; current = (const T*)current + num; return ptr; } void loadItems(const void* current, std::vector& items, bool mapped) { uint64_t binaryFileVersion = *get(current); ABORT_IF(binaryFileVersion != BINARY_FILE_VERSION, "Binary file versions do not match: {} (file) != {} (expected)", binaryFileVersion, BINARY_FILE_VERSION); uint64_t numHeaders = *get(current); // number of item headers that follow const Header* headers = get

(current, numHeaders); // read that many headers // prepopulate items with meta data from headers items.resize(numHeaders); for(int i = 0; i < numHeaders; ++i) { items[i].type = (Type)headers[i].type; items[i].name = get(current, headers[i].nameLength); items[i].mapped = mapped; } // read in actual shape and data for(int i = 0; i < numHeaders; ++i) { uint64_t len = headers[i].shapeLength; items[i].shape.resize(len); const int* arr = get(current, len); // read shape std::copy(arr, arr + len, items[i].shape.begin()); // copy to Item::shape } // move by offset bytes, aligned to 256-bytes boundary uint64_t offset = *get(current); get(current, offset); for(int i = 0; i < numHeaders; ++i) { // For intgemm AVX512 and AVX512VNNI have the same arangement, but the VNNI algorithm is faster. // Change the type to the fastest one supported. if (items[i].type == Type::intgemm8avx512) { items[i].type = cpu::integer::getIntgemmType(Type::intgemm8); } if(items[i].mapped) { // memory-mapped, hence only set pointer // @TOOD: verify this actually works for the hardware-specific ones like intgemm8avx2 ABORT_IF(items[i].type == Type::intgemm8 || items[i].type == Type::intgemm16, "mmap format not supported for hardware non-specific intgemm matrices"); items[i].ptr = get(current, headers[i].dataLength); } else { // reading into item data uint64_t len = headers[i].dataLength; items[i].bytes.resize(len); const char* ptr = get(current, len); // Intgemm8/16 matrices in binary model are just quantized, however they also need to be reordered // Reordering depends on the architecture (SSE/AVX2/AVX512) so we read in the quantized matrices and // then reorder them before adding them as a parameter in the graph. if (matchType(items[i].type)) { items[i].type = cpu::integer::getIntgemmType(Type::intgemm8); cpu::integer::prepareAndTransposeB(items[i], ptr); } else if (matchType(items[i].type)) { items[i].type = cpu::integer::getIntgemmType(Type::intgemm16); cpu::integer::prepareAndTransposeB(items[i], ptr); } else { std::copy(ptr, ptr + len, items[i].bytes.begin()); } } } } void loadItems(const std::string& fileName, std::vector& items) { // Read file into buffer uint64_t fileSize = filesystem::fileSize(fileName); std::vector buf(fileSize); // @TODO: check this again: #if 1 // for some reason, the #else branch fails with "file not found" in the *read* operation (open succeeds) FILE *f = fopen(fileName.c_str(), "rb"); ABORT_IF(f == nullptr, "Error {} ('{}') opening file '{}'", errno, strerror(errno), fileName); auto rc = fread(buf.data(), sizeof(*buf.data()), buf.size(), f); ABORT_IF(rc != buf.size(), "Error {} ('{}') reading file '{}'", errno, strerror(errno), fileName); fclose(f); #else io::InputFileStream in(fileName); in.read(buf.data(), buf.size()); #endif // Load items from buffer without mapping loadItems(buf.data(), items, false); } io::Item getItem(const void* current, const std::string& varName) { std::vector items; loadItems(current, items); for(auto& item : items) if(item.name == varName) return item; return io::Item(); } io::Item getItem(const std::string& fileName, const std::string& varName) { std::vector items; loadItems(fileName, items); for(auto& item : items) if(item.name == varName) return item; return io::Item(); } void saveItems(const std::string& fileName, const std::vector& items) { io::OutputFileStream out(fileName); uint64_t pos = 0; uint64_t binaryFileVersion = BINARY_FILE_VERSION; pos += out.write(&binaryFileVersion); std::vector

headers; for(const auto& item : items) { headers.push_back(Header{item.name.size() + 1, (uint64_t)item.type, item.shape.size(), item.bytes.size()}); // binary item size with padding, will be 256-byte-aligned } uint64_t headerSize = headers.size(); pos += out.write(&headerSize); pos += out.write(headers.data(), headers.size()); // Write out all names for(const auto& item : items) { pos += out.write(item.name.data(), item.name.size() + 1); } // Write out all shapes for(const auto& item : items) { pos += out.write(item.shape.data(), item.shape.size()); } // align to next 256-byte boundary uint64_t nextpos = ((pos + sizeof(uint64_t)) / 256 + 1) * 256; uint64_t offset = nextpos - pos - sizeof(uint64_t); pos += out.write(&offset); for(uint64_t i = 0; i < offset; i++) { char padding = 0; pos += out.write(&padding); } // Write out all values for(const auto& item : items) pos += out.write(item.data(), item.bytes.size()); // writes out data with padding, keeps 256-byte boundary. // Amazingly this is binary-compatible with V1 and aligned and // non-aligned models can be read with the same procedure. // No version-bump required. Gets 5-8% of speed back when mmapped. } } // namespace binary } // namespace io } // namespace marian