.. _program_listing_file_src_tensors_gpu_backend.h:

Program Listing for File backend.h
==================================

|exhale_lsh| :ref:`Return to documentation for file <file_src_tensors_gpu_backend.h>` (``src/tensors/gpu/backend.h``)

.. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS

.. code-block:: cpp

   #pragma once
   
   #include "common/config.h"
   #include "tensors/backend.h"  // note: this is one folder up
   #include "tensors/gpu/cuda_helpers.h"
   #include "common/logging.h"
   
   #include <cublas_v2.h>
   #include <cuda.h>
   #include <curand.h>
   #include <cusparse.h>
   
   namespace marian {
   namespace gpu {
   
   // @TODO: in the future this should pobably become a fully fledged CudaInfo class with many attributes
   struct CudaCompute {
     int major;
     int minor;
   };
   
   class Backend : public marian::Backend {
   private:
     void setCudaComputeCapability() {
       CUDA_CHECK(cudaDeviceGetAttribute(&compute_.major, cudaDevAttrComputeCapabilityMajor, (int)deviceId_.no));
       CUDA_CHECK(cudaDeviceGetAttribute(&compute_.minor, cudaDevAttrComputeCapabilityMinor, (int)deviceId_.no));
     }
   
   public:
     Backend(DeviceId deviceId, size_t seed) : marian::Backend(deviceId, seed) {
       setDevice();
       setCudaComputeCapability();
     }
   
     ~Backend() {
       setDevice();
       if(cusparseHandle_) {
         cusparseDestroy(cusparseHandle_);
         cusparseHandle_ = 0;
       }
       if(cublasHandle_) {
         cublasDestroy(cublasHandle_);
         cublasHandle_ = 0;
       }
     }
   
     void setDevice() override { CUDA_CHECK(cudaSetDevice((int)deviceId_.no)); }
   
     void synchronize() override { CUDA_CHECK(cudaStreamSynchronize(0)); }
   
     cublasHandle_t getCublasHandle() {
       if(!cublasHandle_) { // lazy initialization here to avoid memory usage when unused
         setDevice();
         cublasCreate(&cublasHandle_);
       }
       return cublasHandle_;
     }
   
     cusparseHandle_t getCusparseHandle() {
       if(!cusparseHandle_) { // lazy initialization here to avoid memory usage when unused
         setDevice();
         cusparseCreate(&cusparseHandle_);
       }
       return cusparseHandle_;
     }
   
     // for CPU, sets to use optimized code for inference.
     // for GPU, this is invalid. for gpu, isOptimized() function always returns false.
     void setOptimized(bool optimize) override {
       LOG_ONCE(info, "setOptimized() not supported for GPU_{}", optimize);
     }
     bool isOptimized() override {
       LOG_ONCE(info, "isOptimized() not supported for GPU");
       return false;
     };
   
     // for CPU, selects different GEMM types for the inference.
     // for GPU, there's no gemm type. so, it does nothing.
     void setGemmType(std::string gemmType) override {
       LOG_ONCE(info, "setGemmType() not supported for GPU_{}", gemmType);
     }
     GemmType getGemmType() override {
       LOG_ONCE(info, "getGemmType() not supported for GPU");
       return GemmType::Float32;
     }
   
     // for CPU, sets quantization range of weight matrices for the inference.
     // for GPU, there's no quantization. so, it does nothing.
     void setQuantizeRange(float range) override {
       LOG_ONCE(info, "setQuantizeRange() not supported for GPU_{}", range);
     }
     float getQuantizeRange() override {
       LOG_ONCE(info, "getQuantizeRange() not supported for GPU");
       return 0.f;
     }
   
     CudaCompute getCudaComputeCapability() { return compute_; }
   
   private:
     cublasHandle_t cublasHandle_{0};     // make sure it's 0, so it can be initalized lazily
     cusparseHandle_t cusparseHandle_{0}; // as above
     CudaCompute compute_;
   };
   }  // namespace gpu
   }  // namespace marian