Program Listing for File iris.cpp¶
↰ Return to documentation for file (src/examples/iris/iris.cpp
)
#include <vector>
#include "common/filesystem.h"
#include "common/config.h"
#include "examples/iris/helper.cpp"
#include "marian.h"
using namespace marian;
using namespace data;
// Constants for Iris example
const size_t MAX_EPOCHS = 200;
// Function creating feedforward dense network graph
Expr buildIrisClassifier(Ptr<ExpressionGraph> graph,
std::vector<float> inputData,
std::vector<IndexType> outputData = {},
bool train = false) {
// The number of input data
int N = inputData.size() / NUM_FEATURES;
graph->clear();
// Define the input layer
auto x = graph->constant({N, NUM_FEATURES}, inits::fromVector(inputData));
// Define the hidden layer
auto W1 = graph->param("W1", {NUM_FEATURES, 5}, inits::uniform(-0.1f, 0.1f));
auto b1 = graph->param("b1", {1, 5}, inits::zeros());
auto h = tanh(affine(x, W1, b1));
// Define the output layer
auto W2 = graph->param("W2", {5, NUM_LABELS}, inits::uniform(-0.1f, 0.1f));
auto b2 = graph->param("b2", {1, NUM_LABELS}, inits::zeros());
auto o = affine(h, W2, b2);
if(train) {
auto y = graph->indices(outputData);
/* Define cross entropy cost on the output layer.
* It can be also defined directly as:
* -mean(sum(logsoftmax(o) * y, axis=1), axis=0)
* But then `y` requires to be a one-hot-vector, i.e. [0,1,0, 1,0,0, 0,0,1,
* ...] instead of [1, 0, 2, ...].
*/
auto cost = mean(cross_entropy(o, y), /*axis =*/ 0);
return cost;
} else {
auto preds = logsoftmax(o);
return preds;
}
}
int main() {
// Initialize global settings
createLoggers();
// Disable randomness by setting a fixed seed for random number generator
Config::seed = 123456;
// Get path do data set
std::string dataPath
= (filesystem::Path(std::string(__FILE__)).parentPath() / filesystem::Path(std::string("iris.data"))).string();
// Read data set (all 150 examples)
std::vector<float> trainX;
std::vector<IndexType> trainY;
readIrisData(dataPath, trainX, trainY);
// Split shuffled data into training data (120 examples) and test data (rest
// 30 examples)
shuffleData(trainX, trainY);
std::vector<float> testX(trainX.end() - 30 * NUM_FEATURES, trainX.end());
trainX.resize(120 * NUM_FEATURES);
std::vector<IndexType> testY(trainY.end() - 30, trainY.end());
trainY.resize(120);
{
// Create network graph
auto graph = New<ExpressionGraph>();
// Set general options
#ifdef CUDA_FOUND
auto deviceType = DeviceType::gpu;
#else
auto deviceType = DeviceType::cpu;
#endif
graph->setDevice({0, deviceType});
graph->reserveWorkspaceMB(128);
// Choose optimizer (Sgd, Adagrad, Adam) and initial learning rate
auto opt = Optimizer(New<Options>("optimizer", "adam", "learn-rate", 0.005));
for(size_t epoch = 1; epoch <= MAX_EPOCHS; ++epoch) {
// Shuffle data in each epochs
shuffleData(trainX, trainY);
// Build classifier
auto cost = buildIrisClassifier(graph, trainX, trainY, true);
// Train classifier and update weights
graph->forward();
graph->backward();
opt->update(graph, /*mbSize=*/0);
if(epoch % 10 == 0)
std::cout << "Epoch: " << epoch << " Cost: " << cost->scalar()
<< std::endl;
}
// Build classifier with test data
auto probs = buildIrisClassifier(graph, testX);
// Print probabilities for debugging. The `debug` function has to be called
// prior to computations in the network.
// debug(probs, "Classifier probabilities")
// Run classifier
graph->forward();
// Extract predictions
std::vector<float> preds(testY.size());
probs->val()->get(preds);
std::cout << "Accuracy: " << calculateAccuracy(preds, testY) << std::endl;
}
return 0;
}