Program Listing for File tensor_operators.h¶
↰ Return to documentation for file (src/tensors/tensor_operators.h
)
#pragma once
#include "common/definitions.h"
#include "tensors/allocator.h"
#include "tensors/tensor.h"
#include "tensors/dispatch.h"
#include "functional/shape.h"
#include "functional/tensor.h"
#include "functional/tmp.h"
#ifdef CUDA_FOUND
#include "tensors/gpu/add.h"
#include "tensors/gpu/algorithm.h"
#include "tensors/gpu/element.h"
#include "tensors/gpu/prod.h"
#endif
#include "tensors/cpu/add.h"
#include "tensors/cpu/element.h"
#include <algorithm>
namespace marian {
template <typename InIt, typename OutIt>
void copy(Ptr<Backend>& backend, const InIt beg, const InIt end, OutIt it) {
#ifdef CUDA_FOUND
if(backend->getDeviceId().type == DeviceType::gpu)
gpu::copy(backend, beg, end, it);
else
std::copy(beg, end, it);
#else
backend;
std::copy(beg, end, it);
#endif
}
DISPATCH2(CopyCast, marian::Tensor, const marian::Tensor);
DISPATCH2(AddCast, marian::Tensor, const marian::Tensor);
DISPATCH4(IsNaN, const Tensor, Ptr<Allocator>, bool&, bool&);
#ifdef CUDA_FOUND
namespace gpu {
bool SanitizeGradient(marian::Tensor in, Ptr<Allocator> allocator, bool pruneNaN, bool clipInf);
}
#endif
namespace cpu {
bool SanitizeGradient(marian::Tensor in, Ptr<Allocator> allocator, bool pruneNaN, bool clipInf);
}
static inline bool SanitizeGradient(marian::Tensor in, Ptr<Allocator> allocator, bool pruneNaN, bool clipInf) {
#ifdef CUDA_FOUND
if(in->getBackend()->getDeviceId().type == DeviceType::gpu)
return gpu::SanitizeGradient(in, allocator, pruneNaN, clipInf);
else
#endif
return cpu::SanitizeGradient(in, allocator, pruneNaN, clipInf);
}
template <class Functor, class... Tensors>
void Element(Functor functor, marian::Tensor out, Tensors... tensors) {
#ifdef CUDA_FOUND
if(out->getBackend()->getDeviceId().type == DeviceType::gpu)
gpu::Element(functor, out, tensors...);
else
#endif
cpu::Element(functor, out, tensors...);
}
template <class Functor, class... Tensors>
void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors) {
#ifdef CUDA_FOUND
if(out->getBackend()->getDeviceId().type == DeviceType::gpu)
gpu::Add(functor, scale, out, tensors...);
else
#endif
cpu::Aggregate(functor, /*aggInit=*/0.0f, functional::_1 + functional::_2, scale, out, tensors...);
}
template <class Functor, class... Tensors>
void Add(Functor functor, marian::Tensor out, Tensors... tensors) {
Add(functor, /*scale=*/1.f, out, tensors...);
}
template <class Functor, class AggFunctor, class... Tensors>
void Aggregate(Functor functor, float aggInit, AggFunctor aggFunctor, marian::Tensor out, Tensors... tensors) {
#ifdef CUDA_FOUND
if(out->getBackend()->getDeviceId().type == DeviceType::gpu)
gpu::Aggregate(functor, aggInit, aggFunctor, 1.0f, out, tensors...);
else
#endif
cpu::Aggregate(functor, aggInit, aggFunctor, 1.0f, out, tensors...);
}
template <class Functor, class... Tensors>
void Reduce(Functor functor,
float scale,
marian::Tensor out,
Tensors... tensors) {
out->set(0.f);
Add(functor, scale, out, tensors...);
}
template <class Functor, class... Tensors>
void Reduce(Functor functor, marian::Tensor out, Tensors... tensors) {
out->set(0.f);
Add(functor, out, tensors...);
}
template <class Functor, class AggFunctor, class... Tensors>
void Reduce(Functor functor, AggFunctor aggFunctor, float aggInit,
marian::Tensor out,
Tensors... tensors) {
out->set(aggInit);
Aggregate(functor, aggInit, aggFunctor, out, tensors...);
}
// clang-format off
DISPATCH7(Prod, marian::Tensor, const marian::Tensor&, const marian::Tensor&, bool, bool, float, float)
DISPATCH8(Prod, marian::Tensor, const marian::Tensor&, const marian::Tensor&, bool, bool, float, float, Type) // overloading since we want the default to for computeType be C->type() which difficult otherwise.
DISPATCH8(ProdBatched, marian::Tensor, Ptr<Allocator>, const marian::Tensor, const marian::Tensor, bool, bool, float, float)
DISPATCH8(ProdBatchedLegacy, marian::Tensor, Ptr<Allocator>, const marian::Tensor, const marian::Tensor, bool, bool, float, float)
DISPATCH9(CSRProd, marian::Tensor, Ptr<Allocator>, const marian::Tensor&, const marian::Tensor&, const marian::Tensor&, const marian::Tensor&, bool, bool, float)
DISPATCH10(Affine, marian::Tensor, Ptr<Allocator>, const marian::Tensor&, const marian::Tensor&, const marian::Tensor&, bool, bool, float, float, bool)
DISPATCH2(Softmax, marian::Tensor, marian::Tensor)
DISPATCH3(SoftmaxGrad, marian::Tensor, marian::Tensor, marian::Tensor)
DISPATCH2(LogSoftmax, marian::Tensor, marian::Tensor)
DISPATCH3(LogSoftmaxGrad, marian::Tensor, marian::Tensor, marian::Tensor)
DISPATCH4(CrossEntropyPick, marian::Tensor, marian::Tensor, marian::Tensor, float)
DISPATCH5(CrossEntropyPickBackward, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, float)
DISPATCH3(TransposeND, marian::Tensor, marian::Tensor, const std::vector<int>&)
DISPATCH3(TransposeNDGrad, marian::Tensor, marian::Tensor, const std::vector<int>&)
DISPATCH5(Shift, marian::Tensor, marian::Tensor, marian::Shape, float, bool)
DISPATCH4(ShiftGrad, marian::Tensor, marian::Tensor, marian::Shape, bool)
DISPATCH3(Concatenate, marian::Tensor, const std::vector<marian::Tensor>&, int)
// clang-format on
// Bernoulli(tensor, 0.5f, 2.f, -1.f) generates a tensor composed of 50% of 1 and 50% of -1.
static inline void Bernoulli(Tensor resultTensor, float keepProb, float scale = 1.f, float shift = 0.f) {
// in-place uniform distribution
auto rnd = resultTensor->getBackend()->getRandomGenerator();
rnd->uniform(resultTensor, 0.f, 1.f); // temporarily mis-use this to hold the random numbers
using namespace functional;
Element(_1 = (_1 < keepProb) * scale + shift, resultTensor);
}
static inline void Dropout(Tensor tensor, float dropProb) {
float keepProb = 1.f - dropProb;
float scale = 1.f / keepProb;
Bernoulli(tensor, keepProb, scale, /*shift=*/0.f);
}
DISPATCH2(SinusoidalPositionEmbeddings, marian::Tensor, int);
#ifdef CUDA_FOUND
namespace gpu {
void Deconcatenate(std::vector<marian::Tensor>& outputs,
const marian::Tensor in,
int ax);
}
#endif
namespace cpu {
void Deconcatenate(std::vector<marian::Tensor>& outputs,
const marian::Tensor in,
int ax);
}
static inline void Deconcatenate(std::vector<marian::Tensor>& outputs,
const marian::Tensor in,
int ax) {
#ifdef CUDA_FOUND
if(in->getBackend()->getDeviceId().type == DeviceType::gpu)
gpu::Deconcatenate(outputs, in, ax);
else
#endif
cpu::Deconcatenate(outputs, in, ax);
}
// clang-format off
DISPATCH5(LayerNormalization, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, float)
#ifdef CUDA_FOUND
namespace gpu {
void LayerNormalizationGrad(Ptr<Allocator> allocator,
Tensor gradX,
Tensor gradGamma,
Tensor gradBeta,
Tensor adj,
Tensor y,
Tensor x,
Tensor gamma,
Tensor beta,
float eps);
}
#endif
namespace cpu {
void LayerNormalizationGrad(Tensor gradX,
Tensor gradGamma,
Tensor gradBeta,
Tensor adj,
Tensor y,
Tensor x,
Tensor gamma,
Tensor beta,
float eps);
}
static inline void LayerNormalizationGrad(
Ptr<Allocator> allocator,
Tensor gradX,
Tensor gradGamma,
Tensor gradBeta,
Tensor adj,
Tensor y,
Tensor x,
Tensor gamma,
Tensor beta,
float eps) {
#ifdef CUDA_FOUND
if(gradX->getBackend()->getDeviceId().type == DeviceType::gpu)
gpu::LayerNormalizationGrad(allocator, gradX, gradGamma, gradBeta, adj, y, x, gamma, beta, eps);
else
#endif
cpu::LayerNormalizationGrad(gradX, gradGamma, gradBeta, adj, y, x, gamma, beta, eps);
}
// clang-format off
DISPATCH5(RMSNormalization, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, float)
#ifdef CUDA_FOUND
namespace gpu {
void RMSNormalizationGrad(Ptr<Allocator> allocator,
Tensor gradX,
Tensor gradGamma,
Tensor gradBeta,
Tensor adj,
Tensor y,
Tensor x,
Tensor gamma,
Tensor beta,
float eps);
}
#endif
namespace cpu {
void RMSNormalizationGrad(Tensor gradX,
Tensor gradGamma,
Tensor gradBeta,
Tensor adj,
Tensor y,
Tensor x,
Tensor gamma,
Tensor beta,
float eps);
}
static inline void RMSNormalizationGrad(
Ptr<Allocator> allocator,
Tensor gradX,
Tensor gradGamma,
Tensor gradBeta,
Tensor adj,
Tensor y,
Tensor x,
Tensor gamma,
Tensor beta,
float eps) {
#ifdef CUDA_FOUND
if(gradX->getBackend()->getDeviceId().type == DeviceType::gpu)
gpu::RMSNormalizationGrad(allocator, gradX, gradGamma, gradBeta, adj, y, x, gamma, beta, eps);
else
#endif
cpu::RMSNormalizationGrad(gradX, gradGamma, gradBeta, adj, y, x, gamma, beta, eps);
}
DISPATCH4(HighwayForward, marian::Tensor, const marian::Tensor, const marian::Tensor, const marian::Tensor)
DISPATCH7(HighwayBackward, marian::Tensor, marian::Tensor, marian::Tensor, const marian::Tensor, const marian::Tensor, const marian::Tensor, const marian::Tensor)
DISPATCH3(CopyRows, marian::Tensor, const marian::Tensor, const marian::Tensor)
DISPATCH3(PasteRows, marian::Tensor, const marian::Tensor, const marian::Tensor)
DISPATCH3(CopyCols, marian::Tensor, const marian::Tensor, const marian::Tensor)
DISPATCH3(PasteCols, marian::Tensor, const marian::Tensor, const marian::Tensor)
DISPATCH4(Select, marian::Tensor, const marian::Tensor, const marian::Tensor, int)
#ifdef CUDA_FOUND
namespace gpu {
template <bool add>
void Insert(Tensor out, const Tensor in, const Tensor indices, int axis);
}
#endif
namespace cpu {
template <bool add>
void Insert(Tensor out, const Tensor in, const Tensor indices, int axis);
}
template <bool add>
static inline void Insert(Tensor out, const Tensor in, const Tensor indices, int axis) {
#ifdef CUDA_FOUND
if(out->getBackend()->getDeviceId().type == DeviceType::gpu)
gpu::Insert<add>(out, in, indices, axis);
else
#endif
cpu::Insert<add>(out, in, indices, axis);
}
DISPATCH7(TopK, marian::Tensor, marian::Tensor, Ptr<Allocator>, const marian::Tensor, int, int, bool);
DISPATCH2(LSTMCellForward, marian::Tensor, std::vector<marian::Tensor>)
DISPATCH2(LSTMOutputForward, marian::Tensor, std::vector<marian::Tensor>);
// clang-format on
#ifdef CUDA_FOUND
namespace gpu {
void LSTMCellBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj);
}
#endif
namespace cpu {
void LSTMCellBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj);
}
static inline void LSTMCellBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj) {
#ifdef CUDA_FOUND
if(adj->getBackend()->getDeviceId().type == DeviceType::gpu)
gpu::LSTMCellBackward(outputs, inputs, adj);
else
#endif
cpu::LSTMCellBackward(outputs, inputs, adj);
}
#ifdef CUDA_FOUND
namespace gpu {
void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj);
}
#endif
namespace cpu {
void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj);
}
static inline void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj) {
#ifdef CUDA_FOUND
if(adj->getBackend()->getDeviceId().type == DeviceType::gpu)
gpu::LSTMOutputBackward(outputs, inputs, adj);
else
#endif
cpu::LSTMOutputBackward(outputs, inputs, adj);
}
DISPATCH3(GRUFastForward, marian::Tensor, std::vector<marian::Tensor>, bool)
#ifdef CUDA_FOUND
namespace gpu {
void GRUFastBackward(Ptr<Allocator> allocator,
std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj,
bool final);
}
#endif
namespace cpu {
void GRUFastBackward(Ptr<Allocator> allocator,
std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj,
bool final);
}
static inline void GRUFastBackward(Ptr<Allocator> allocator,
std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj,
bool final = false) {
#ifdef CUDA_FOUND
if(adj->getBackend()->getDeviceId().type == DeviceType::gpu)
gpu::GRUFastBackward(allocator, outputs, inputs, adj, final);
else
#endif
cpu::GRUFastBackward(allocator, outputs, inputs, adj, final);
}
// clang-format off
DISPATCH4(Att, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor)
DISPATCH7(AttBack, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor)
// clang-format on
#ifdef CUDA_FOUND
namespace gpu {
float L2Norm(marian::Tensor in, Ptr<Allocator> allocator);
}
#endif
namespace cpu {
float L2Norm(marian::Tensor in, Ptr<Allocator> allocator);
}
static inline float L2Norm(marian::Tensor in, Ptr<Allocator> allocator) {
#ifdef CUDA_FOUND
if(in->getBackend()->getDeviceId().type == DeviceType::gpu)
return gpu::L2Norm(in, allocator);
else
#endif
return cpu::L2Norm(in, allocator);
}
// clang-format off
DISPATCH5(PoolingWithMaskingForward, marian::Tensor, marian::Tensor, marian::Tensor, int, bool)
DISPATCH6(PoolingWithMaskingBackward, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, int, bool)
// clang-format on
} // namespace marian