.. _program_listing_file_src_tensors_gpu_backend.h: Program Listing for File backend.h ================================== |exhale_lsh| :ref:`Return to documentation for file ` (``src/tensors/gpu/backend.h``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #pragma once #include "common/config.h" #include "tensors/backend.h" // note: this is one folder up #include "tensors/gpu/cuda_helpers.h" #include "common/logging.h" #include #include #include #include namespace marian { namespace gpu { // @TODO: in the future this should pobably become a fully fledged CudaInfo class with many attributes struct CudaCompute { int major; int minor; }; class Backend : public marian::Backend { private: void setCudaComputeCapability() { CUDA_CHECK(cudaDeviceGetAttribute(&compute_.major, cudaDevAttrComputeCapabilityMajor, (int)deviceId_.no)); CUDA_CHECK(cudaDeviceGetAttribute(&compute_.minor, cudaDevAttrComputeCapabilityMinor, (int)deviceId_.no)); } public: Backend(DeviceId deviceId, size_t seed) : marian::Backend(deviceId, seed) { setDevice(); setCudaComputeCapability(); } ~Backend() { setDevice(); if(cusparseHandle_) { cusparseDestroy(cusparseHandle_); cusparseHandle_ = 0; } if(cublasHandle_) { cublasDestroy(cublasHandle_); cublasHandle_ = 0; } } void setDevice() override { CUDA_CHECK(cudaSetDevice((int)deviceId_.no)); } void synchronize() override { CUDA_CHECK(cudaStreamSynchronize(0)); } cublasHandle_t getCublasHandle() { if(!cublasHandle_) { // lazy initialization here to avoid memory usage when unused setDevice(); cublasCreate(&cublasHandle_); } return cublasHandle_; } cusparseHandle_t getCusparseHandle() { if(!cusparseHandle_) { // lazy initialization here to avoid memory usage when unused setDevice(); cusparseCreate(&cusparseHandle_); } return cusparseHandle_; } // for CPU, sets to use optimized code for inference. // for GPU, this is invalid. for gpu, isOptimized() function always returns false. void setOptimized(bool optimize) override { LOG_ONCE(info, "setOptimized() not supported for GPU_{}", optimize); } bool isOptimized() override { LOG_ONCE(info, "isOptimized() not supported for GPU"); return false; }; // for CPU, selects different GEMM types for the inference. // for GPU, there's no gemm type. so, it does nothing. void setGemmType(std::string gemmType) override { LOG_ONCE(info, "setGemmType() not supported for GPU_{}", gemmType); } GemmType getGemmType() override { LOG_ONCE(info, "getGemmType() not supported for GPU"); return GemmType::Float32; } // for CPU, sets quantization range of weight matrices for the inference. // for GPU, there's no quantization. so, it does nothing. void setQuantizeRange(float range) override { LOG_ONCE(info, "setQuantizeRange() not supported for GPU_{}", range); } float getQuantizeRange() override { LOG_ONCE(info, "getQuantizeRange() not supported for GPU"); return 0.f; } CudaCompute getCudaComputeCapability() { return compute_; } private: cublasHandle_t cublasHandle_{0}; // make sure it's 0, so it can be initalized lazily cusparseHandle_t cusparseHandle_{0}; // as above CudaCompute compute_; }; } // namespace gpu } // namespace marian