Program Listing for File quantizer.h¶
↰ Return to documentation for file (src/optimizers/quantizer.h
)
#pragma once
#include "common/options.h"
#include "functional/functional.h"
#include "graph/expression_graph.h"
#include "tensors/backend.h"
#include "tensors/tensor.h"
#include "tensors/tensor_allocator.h"
#include "tensors/tensor_operators.h"
namespace marian {
/* Class to implement quantization of all the parameters in a model graph
* This class handles the required error-feedback mechanism internally.
* Example:
* auto mq = New<ModelQuantizer>(options_);
* mq->quantize(graph_);
*
* Parameters in graph_ will be quantized every time quantize is called.
* The internal error-residual is also updated each quantize call,
* therefore, use the same ModelQuantizer object to quantize the same graph.
*/
class ModelQuantizer {
public:
ModelQuantizer(Ptr<Options> options)
: bits_{options->get<size_t>("quantize-bits")},
optSteps_{options->get<size_t>("quantize-optimization-steps")},
quantBias_{options->get<bool>("quantize-biases")},
logQuant_{options->get<bool>("quantize-log-based")} {}
void quantize(Ptr<ExpressionGraph> graph);
protected:
void quantizeImpl(Tensor t);
size_t bits_;
size_t optSteps_;
bool quantBias_;
bool logQuant_;
bool isFirstError_;
std::vector<Ptr<TensorAllocator>> allocators_;
Tensor errorResidual_; // Tensor to store the error-residual
Tensor delta_; // temporary Tensor for storing q to calculate optimal S
Tensor tempVar_; // single element Tensor for Reduce swap variable
};
} // namespace marian