Program Listing for File binary.cpp¶
↰ Return to documentation for file (src/common/binary.cpp
)
#include "common/binary.h"
#include "common/definitions.h"
#include "common/file_stream.h"
#include "common/io_item.h"
#include "common/types.h"
#include "tensors/cpu/integer_common.h"
#include <string>
namespace marian {
namespace io {
namespace binary {
struct Header {
uint64_t nameLength;
uint64_t type;
uint64_t shapeLength;
uint64_t dataLength;
};
// cast current void pointer to T pointer and move forward by num elements
template <typename T>
const T* get(const void*& current, uint64_t num = 1) {
const T* ptr = (const T*)current;
current = (const T*)current + num;
return ptr;
}
void loadItems(const void* current, std::vector<io::Item>& items, bool mapped) {
uint64_t binaryFileVersion = *get<uint64_t>(current);
ABORT_IF(binaryFileVersion != BINARY_FILE_VERSION,
"Binary file versions do not match: {} (file) != {} (expected)",
binaryFileVersion,
BINARY_FILE_VERSION);
uint64_t numHeaders = *get<uint64_t>(current); // number of item headers that follow
const Header* headers = get<Header>(current, numHeaders); // read that many headers
// prepopulate items with meta data from headers
items.resize(numHeaders);
for(int i = 0; i < numHeaders; ++i) {
items[i].type = (Type)headers[i].type;
items[i].name = get<char>(current, headers[i].nameLength);
items[i].mapped = mapped;
}
// read in actual shape and data
for(int i = 0; i < numHeaders; ++i) {
uint64_t len = headers[i].shapeLength;
items[i].shape.resize(len);
const int* arr = get<int>(current, len); // read shape
std::copy(arr, arr + len, items[i].shape.begin()); // copy to Item::shape
}
// move by offset bytes, aligned to 256-bytes boundary
uint64_t offset = *get<uint64_t>(current);
get<char>(current, offset);
for(int i = 0; i < numHeaders; ++i) {
// For intgemm AVX512 and AVX512VNNI have the same arangement, but the VNNI algorithm is faster.
// Change the type to the fastest one supported.
if (items[i].type == Type::intgemm8avx512) {
items[i].type = cpu::integer::getIntgemmType(Type::intgemm8);
}
if(items[i].mapped) { // memory-mapped, hence only set pointer
// @TOOD: verify this actually works for the hardware-specific ones like intgemm8avx2
ABORT_IF(items[i].type == Type::intgemm8 || items[i].type == Type::intgemm16, "mmap format not supported for hardware non-specific intgemm matrices");
items[i].ptr = get<char>(current, headers[i].dataLength);
} else { // reading into item data
uint64_t len = headers[i].dataLength;
items[i].bytes.resize(len);
const char* ptr = get<char>(current, len);
// Intgemm8/16 matrices in binary model are just quantized, however they also need to be reordered
// Reordering depends on the architecture (SSE/AVX2/AVX512) so we read in the quantized matrices and
// then reorder them before adding them as a parameter in the graph.
if (matchType<intgemm8>(items[i].type)) {
items[i].type = cpu::integer::getIntgemmType(Type::intgemm8);
cpu::integer::prepareAndTransposeB<Type::intgemm8>(items[i], ptr);
} else if (matchType<intgemm16>(items[i].type)) {
items[i].type = cpu::integer::getIntgemmType(Type::intgemm16);
cpu::integer::prepareAndTransposeB<Type::intgemm16>(items[i], ptr);
} else {
std::copy(ptr, ptr + len, items[i].bytes.begin());
}
}
}
}
void loadItems(const std::string& fileName, std::vector<io::Item>& items) {
// Read file into buffer
uint64_t fileSize = filesystem::fileSize(fileName);
std::vector<char> buf(fileSize);
// @TODO: check this again:
#if 1 // for some reason, the #else branch fails with "file not found" in the *read* operation (open succeeds)
FILE *f = fopen(fileName.c_str(), "rb");
ABORT_IF(f == nullptr, "Error {} ('{}') opening file '{}'", errno, strerror(errno), fileName);
auto rc = fread(buf.data(), sizeof(*buf.data()), buf.size(), f);
ABORT_IF(rc != buf.size(), "Error {} ('{}') reading file '{}'", errno, strerror(errno), fileName);
fclose(f);
#else
io::InputFileStream in(fileName);
in.read(buf.data(), buf.size());
#endif
// Load items from buffer without mapping
loadItems(buf.data(), items, false);
}
io::Item getItem(const void* current, const std::string& varName) {
std::vector<io::Item> items;
loadItems(current, items);
for(auto& item : items)
if(item.name == varName)
return item;
return io::Item();
}
io::Item getItem(const std::string& fileName, const std::string& varName) {
std::vector<io::Item> items;
loadItems(fileName, items);
for(auto& item : items)
if(item.name == varName)
return item;
return io::Item();
}
void saveItems(const std::string& fileName,
const std::vector<io::Item>& items) {
io::OutputFileStream out(fileName);
uint64_t pos = 0;
uint64_t binaryFileVersion = BINARY_FILE_VERSION;
pos += out.write(&binaryFileVersion);
std::vector<Header> headers;
for(const auto& item : items) {
headers.push_back(Header{item.name.size() + 1,
(uint64_t)item.type,
item.shape.size(),
item.bytes.size()}); // binary item size with padding, will be 256-byte-aligned
}
uint64_t headerSize = headers.size();
pos += out.write(&headerSize);
pos += out.write(headers.data(), headers.size());
// Write out all names
for(const auto& item : items) {
pos += out.write(item.name.data(), item.name.size() + 1);
}
// Write out all shapes
for(const auto& item : items) {
pos += out.write(item.shape.data(), item.shape.size());
}
// align to next 256-byte boundary
uint64_t nextpos = ((pos + sizeof(uint64_t)) / 256 + 1) * 256;
uint64_t offset = nextpos - pos - sizeof(uint64_t);
pos += out.write(&offset);
for(uint64_t i = 0; i < offset; i++) {
char padding = 0;
pos += out.write(&padding);
}
// Write out all values
for(const auto& item : items)
pos += out.write(item.data(), item.bytes.size()); // writes out data with padding, keeps 256-byte boundary.
// Amazingly this is binary-compatible with V1 and aligned and
// non-aligned models can be read with the same procedure.
// No version-bump required. Gets 5-8% of speed back when mmapped.
}
} // namespace binary
} // namespace io
} // namespace marian