.. _program_listing_file_src_tensors_cpu_add.h: Program Listing for File add.h ============================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/tensors/cpu/add.h``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* All or part of this file was contributed by Intel under license: * Copyright (C) 2017-2018 Intel Corporation * SPDX-License-Identifier: MIT */ #pragma once #include "functional/functional.h" #include "functional/shape.h" #include "functional/tensor.h" #include "functional/tmp.h" #include "tensors/tensor.h" namespace marian { namespace cpu { template void gAggregateGeneric(Functor functor, float aggInit, AggFunctor aggFunctor, const functional::Shape full, functional::Tensor out, functional::Array, K> ins, float scale = 1.0) { int outLength = out.shape().elements(); bool same = outLength == full.elements(); for(size_t i = 0; i < K; ++i) same = same && outLength == ins[i].shape().elements(); constexpr size_t N = functional::Shape::size(); functional::Array len; for(int i = 0; i < N; ++i) len[i] = full[i] / out.shape()[i]; functional::Array dims; for(int index = 0; index < outLength; ++index) { if(same) { out[index] = aggFunctor(out[index], functional::apply(functor, ins, index) * scale); } else { out.shape().dims(index, dims); out[index] = aggFunctor(out[index], functional::loops(functor, aggInit, aggFunctor, ins, len, dims) * scale); } } } template void gAggregateEqual(Functor functor, AggFunctor aggFunctor, functional::Tensor out, functional::Array, K> ins, float scale, bool broadcast) { int length = out.shape().elements(); functional::Array dims; for(int index = 0; index < length; ++index) { functional::Array indices; indices.fill(index); if(broadcast) { out.shape().dims(index, dims); for(size_t i = 0; i < K; ++i) indices[i] = ins[i].shape().bindex(dims); } out[index] = aggFunctor(out[index], functional::apply(functor, ins, indices) * scale); } } template void gAggregateReduce(Functor functor, float aggInit, AggFunctor aggFunctor, const functional::Shape full, functional::Tensor out, functional::Array, K> ins, float scale = 1.0) { int rows = full.elements() / full.back(); int cols = full.back(); bool same = true; for(size_t i = 0; i < K; ++i) same = same && ins[i].shape().elements() == full.elements(); for(int j = 0; j < rows; ++j) { float colSum = aggInit; if(same) { for(int id = 0; id < cols; ++id) colSum = aggFunctor(colSum, functional::apply(functor, ins, j * cols + id)); } else { functional::Array dims; for(int id = 0; id < cols; ++id) { full.dims(j * cols + id, dims); functional::Array indices; for(size_t i = 0; i < K; ++i) indices[i] = ins[i].shape().bindex(dims); colSum = aggFunctor(colSum, functional::apply(functor, ins, indices)); } } out[j] = aggFunctor(out[j], colSum * scale); } } template void Aggregate(Functor functor, float aggInit, AggFunctor aggFunctor, float scale, marian::Tensor out, Tensors... tensors) { auto full = marian::Shape::broadcast({out, tensors...}); //int length = out->shape().elements(); constexpr size_t K = sizeof...(Tensors); functional::Tensor gOut = out; functional::Array, K> gIns = {tensors...}; if(full.back() != 1 && out->shape().back() == 1) { //size_t m = full.elements() / length; //size_t k = full.back(); cpu::gAggregateReduce(functor, aggInit, aggFunctor, full, gOut, gIns, scale); } else if(out->shape() == full) { bool broadcast = false; for(size_t i = 0; i < K; ++i) broadcast = broadcast || gOut.shape() != gIns[i].shape(); cpu::gAggregateEqual(functor, aggFunctor, gOut, gIns, scale, broadcast); } else { cpu::gAggregateGeneric(functor, aggInit, aggFunctor, full, gOut, gIns, scale); } } } // namespace cpu } // namespace marian