Program Listing for File model_lenet.h

Return to documentation for file (src/examples/mnist/model_lenet.h)

#pragma once

#include "examples/mnist/model.h"
#include "layers/convolution.h"

namespace marian {
namespace models {

class MnistLeNet : public MnistFeedForwardNet {
public:
  template <class... Args>
  MnistLeNet(Ptr<Options> options, Args... args)
      : MnistFeedForwardNet(options, args...) {}

  virtual void clear(Ptr<ExpressionGraph> graph) override { graph->clear(); };

protected:
  virtual Expr apply(Ptr<ExpressionGraph> g,
                     Ptr<data::Batch> batch,
                     bool inference = false) override {
    const std::vector<int> dims = {784, 128, 10};

    // Start with an empty expression graph
    clear(g);

    // Create an input layer of shape batchSize x numFeatures and populate it
    // with training features
    auto features
        = std::static_pointer_cast<data::DataBatch>(batch)->features();
    auto x = g->constant({(int)batch->size(), 1, 28, 28},
                         inits::fromVector(features));

    // Construct hidden layers

    // clang-format off
    auto conv_1 = convolution(g)
                    ("prefix", "conv_1")
                    ("kernel-dims", std::make_pair(3,3))
                    ("kernel-num", 32)
                    .apply(x);

    auto conv_2 = convolution(g)
                    ("prefix", "conv_2")
                    ("kernel-dims", std::make_pair(3,3))
                    ("kernel-num", 64)
                    .apply(conv_1);
    // clang-format on

    auto relued = relu(conv_2);
    auto pool = max_pooling(relued, 2, 2, 1, 1, 1, 1);

    auto flatten
        = reshape(pool,
                  {pool->shape()[0],
                   pool->shape()[1] * pool->shape()[2] * pool->shape()[3]});
    auto drop1 = dropout(flatten, 0.25);
    std::vector<Expr> layers, weights, biases;

    for(size_t i = 0; i < dims.size() - 1; ++i) {
      int in = dims[i];
      int out = dims[i + 1];

      if(i == 0) {
        // Create a dropout node as the parent of x,
        //   and place that dropout node as the value of layers[0]
        layers.emplace_back(drop1);
        in = drop1->shape()[1];
      } else {
        // Multiply the matrix in layers[i-1] by the matrix in weights[i-1]
        // Take the result, and perform matrix addition on biases[i-1].
        // Wrap the result in rectified linear activation function,
        // and finally wrap that in a dropout node
        layers.emplace_back(
            relu(affine(layers.back(), weights.back(), biases.back())));
      }

      // Construct a weight node for the outgoing connections from layer i
      weights.emplace_back(
          g->param("W" + std::to_string(i), {in, out}, inits::uniform()));

      // Construct a bias node. These weights are initialized to zero
      biases.emplace_back(
          g->param("b" + std::to_string(i), {1, out}, inits::zeros()));
    }

    // Perform matrix multiplication and addition for the last layer
    auto last
        = affine(dropout(layers.back(), 0.5), weights.back(), biases.back());

    if(!inference) {
      // Create an output layer of shape batchSize x 1 and populate it with
      // labels
      auto labels = std::static_pointer_cast<data::DataBatch>(batch)->labels();
      auto y = g->constant({(int)batch->size(), 1}, inits::fromVector(labels));

      // Define a top-level node for training
      return mean(cross_entropy(last, y), /*axis =*/ 0);
    } else {
      // Define a top-level node for inference
      return logsoftmax(last);
    }
  }
};
}  // namespace models
}  // namespace marian