.. _program_listing_file_src_tensors_cpu_fbgemm_packed_gemm.h: Program Listing for File packed_gemm.h ====================================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/tensors/cpu/fbgemm/packed_gemm.h``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #pragma once #include "tensors/tensor.h" namespace marian { namespace cpu { namespace variant { // Variants of GEMM implementations // Returns the byte size of packed matrix in fp16. It's calculated by fbgemm's internal logic due to the paddings and different layouts. // Packing with fp16 only targets AVX2 instruction sets for now. // See '3rd_party/fbgemm/include/fbgemm/FbgemmFP16.h'. // shape: shape of the tensor to be packed // transpose: the matrix is transposed // packsize (out): the size of the packed matrix in byte void fbgemmPacked16PackInfo(const marian::Shape& shape, const bool transpose, /*out*/uint64_t& packsize); // Returns the byte size of packed matrix in fp16. It's calculated by fbgemm's internal logic due to the paddings and different layouts. // This function returns some other extra variables // Packing with fp16 only targets AVX2 instruction sets for now. // See '3rd_party/fbgemm/include/fbgemm/FbgemmFP16.h'. // shape: shape of the tensor to be packed // transpose: the matrix is transposed // nrow (out): the number of rows // ncol (out): the number of columns // kernel_ncol_blocks (out): the number of column blocks // brow (out): the number of rows in a block // bcol (out): the number of columns in a block // last_brow (out): the number of rows in the last block // nbrow (out): row index in a block // nbcol (out): column index in a block // packsize (out): the size of the packed matrix in byte void fbgemmPacked16PackInfo(const marian::Shape& shape, const bool transpose, /*out*/int& nrow, /*out*/int& ncol, /*out*/int& kernel_ncol_blocks, /*out*/int& brow, /*out*/int& bcol, /*out*/int& last_brow, /*out*/int& nbrow, /*out*/int& nbcol, /*out*/uint64_t& packsize); // @TODO: change to size_t where appropriate // Returns the byte size of packed matrix in int8. It's calculated by fbgemm's internal logic due to the paddings and different layouts. // See '3rd_party/fbgemm/src/PackBMatrix.cc'. // shape: shape of the tensor to be packed // packType: Type to be packed - packed8avx2 or packed8avx512 // transpose: the matrix is transposed // nrow (out): the number of rows // ncol (out): the number of columns // packsize (out): the size of the packed matrix in byte void fbgemmPacked8PackInfo(const marian::Shape& shape, const marian::Type packType, const bool transpose, /*out*/int& nrow, /*out*/int& ncol, /*out*/uint64_t& packsize); // Pack a matrix (fp16) into cache utilization efficient way (block format) into fp16 // out: output tensor - packed format // inData: input tensor data - pointer of float data // transpose: the matrix is transposed // nrow: the number of rows // ncol: the number of columns // kernel_ncol_blocks: the number of column blocks // brow: the number of rows in a block // bcol: the number of columns in a block // last_brow: the number of rows in the last block // nbrow: row index in a block // nbcol: column index in a block // packsize: the size of the packed matrix // (the number of fp16 elements + padding (1024) + extra temporary memory (256)) void fbgemmPacked16Pack(marian::Tensor out, const float* inData, const bool transpose, const int nrow, const int ncol, const int kernel_ncol_blocks, const int brow, const int bcol, const int last_brow, const int nbrow, const int nbcol, const uint64_t packsize); // @TODO: change to size_t where appropriate // Pack a matrix (int8) into cache utilization efficient way (block format) together with quantization into int8 // out: output tensor - packed format and quantized into int8 // inData: input tensor data - pointer of float data // packType: Type to be packed - packed8avx2 or packed8avx512 // transpose: the matrix is transposed // nrow: the number of rows // ncol: the number of columns // packsize: the size of the packed matrix // (the size of int8 packed B from fbgemm:PackAWithQuantRowOffset + quantization scale, offset and zero point) // quantRangeStdDevs: the range to be quantized for the original float data in multiples standard deviation // the default value is 0.0f which means min/max quantization // only a half range of normal int8 which is [-64, 63] used to avoid overflow // during the accumulation in VPMADDUBSW instruction // https://intel.github.io/mkl-dnn/dev_guide_int8_computations.html // (e.g. 3.f means the original tensor is quantized // from [mean - 3.f * standard deviation, mean + 3.f * standard deviation] to [-64, 63]) void fbgemmPacked8Pack(marian::Tensor out, const float* inData, const marian::Type packType, const bool transpose, const int nrow, const int ncol, const uint64_t packsize, const float quantRangeStdDevs = 0.f); // @TODO: change to size_t where appropriate // GEMM operation on the packed B matrix // C: output matrix // A: A matrix // B: B matrix (packed) // m: the number of rows in A and C // n: the number of columns in B and C // transA: transpose of A matrix // B is already packed. So, we don't need transB void fbgemmPacked16Gemm(marian::Tensor C, const marian::Tensor A, const marian::Tensor B, const marian::Tensor bias, const size_t m, const size_t n, const int transA = 0); // GEMM operation on the packed B matrix in 8 bit integers // C: output matrix // A: A matrix // B: B matrix (packed) // m: the number of rows in A and C // n: the number of columns in B and C // k: the number of columns in A and rows in B // transA: transpose of A matrix // transB: transpose of B matrix void fbgemmPacked8Gemm(Type packType, marian::Tensor C, const marian::Tensor A, const marian::Tensor B, const size_t m, const size_t n, const size_t k, const int transA = 0, const int transB = 0); } // namespace variant } // namespace cpu } // namespace marian