Program Listing for File backend.h¶
↰ Return to documentation for file (src/tensors/cpu/backend.h
)
#pragma once
#include <functional>
#include <random>
#include "common/config.h"
#include "tensors/backend.h"
namespace marian {
namespace cpu {
class Backend : public marian::Backend {
protected:
bool optimized_{false};
GemmType gemmType_{GemmType::Float32};
float quantizeRange_{0.f};
public:
Backend(DeviceId deviceId, size_t seed) : marian::Backend(deviceId, seed) {}
void setDevice() override {}
void synchronize() override {}
// for CPU & inference only, sets to use optimized code for inference. Does nothing for GPU.
void setOptimized(bool optimize) override { optimized_ = optimize; }
bool isOptimized() override { return optimized_; }
// for CPU only, selects different GEMM types for the inference. Does nothing for GPU.
void setGemmType(std::string gemmType) override {
if (gemmType == "auto") gemmType_ = GemmType::Auto;
else if (gemmType == "float32") gemmType_ = GemmType::Float32;
#if USE_FBGEMM
else if (gemmType == "packed16") gemmType_ = GemmType::FbFp16Packed;
else if (gemmType.find("packed8") == 0) gemmType_ = GemmType::FbInt8Packed;
#endif // USE_FBGEMM
else ABORT("Unknown GEMM type - '{}'", gemmType);
}
GemmType getGemmType() override { return gemmType_; }
// for CPU, sets quantization range of weight matrices for the inference.
// for GPU, there's no quantization. so, it does nothing.
void setQuantizeRange(float range) override { quantizeRange_ = range; }
float getQuantizeRange() override { return quantizeRange_; }
};
} // namespace cpu
} // namespace marian