CudaInfo(): 列印Cuda相關能力

undefined

程式碼:

 

#include "cuda_runtime.h"

#include "device_launch_parameters.h"

 

#include <stdio.h>

 

void CudaInfo();

 

int main()

{

     CudaInfo();

     getchar();

    return 0;

}

 

void CudaInfo()

{

     int deviceCount = 0;

     cudaError_t error_id = cudaGetDeviceCount(&deviceCount);

 

     if (error_id != cudaSuccess) {

         printf("cudaGetDeviceCount returned %d\n-> %s\n",

              static_cast<int>(error_id), cudaGetErrorString(error_id));

         printf("Result = FAIL\n");

         return;

     }

 

     // This function call returns 0 if there are no CUDA capable devices.

     if (deviceCount == 0) {

         printf("There are no available device(s) that support CUDA\n");

     }

     else {

         printf("Detected %d CUDA Capable device(s)\n", deviceCount);

     }

     int dev, driverVersion = 0, runtimeVersion = 0;

 

     for (dev = 0; dev < deviceCount; ++dev) {

         cudaSetDevice(dev);

         cudaDeviceProp deviceProp;

         cudaGetDeviceProperties(&deviceProp, dev);

 

         printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);

 

         // Console log

         cudaDriverGetVersion(&driverVersion);

         cudaRuntimeGetVersion(&runtimeVersion);

         printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n",

              driverVersion / 1000, (driverVersion % 100) / 10,

              runtimeVersion / 1000, (runtimeVersion % 100) / 10);

         printf("  CUDA Capability Major/Minor version number:    %d.%d\n",

              deviceProp.major, deviceProp.minor);

 

         char msg[256];

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)

         sprintf_s(msg, sizeof(msg),

              "  Total amount of global memory:                 %.0f MBytes "

              "(%llu bytes)\n",

              static_cast<float>(deviceProp.totalGlobalMem / 1048576.0f),

              (unsigned long long)deviceProp.totalGlobalMem);

#else

         snprintf(msg, sizeof(msg),

              "  Total amount of global memory:                 %.0f MBytes "

              "(%llu bytes)\n",

              static_cast<float>(deviceProp.totalGlobalMem / 1048576.0f),

              (unsigned long long)deviceProp.totalGlobalMem);

#endif

         printf("%s", msg);

 

         /*

         printf("  (%2d) Multiprocessors, (%3d) CUDA Cores/MP:     %d CUDA Cores\n",

         deviceProp.multiProcessorCount,

         _ConvertSMVer2Cores_local(deviceProp.major, deviceProp.minor),

         _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) *

         deviceProp.multiProcessorCount);

         */

         printf(

              "  GPU Max Clock rate:                            %.0f MHz (%0.2f "

              "GHz)\n",

              deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);

 

#if CUDART_VERSION >= 5000

         // This is supported in CUDA 5.0 (runtime API device properties)

         printf("  Memory Clock rate:                             %.0f Mhz\n",

              deviceProp.memoryClockRate * 1e-3f);

         printf("  Memory Bus Width:                              %d-bit\n",

              deviceProp.memoryBusWidth);

 

         if (deviceProp.l2CacheSize) {

              printf("  L2 Cache Size:                                 %d bytes\n",

                  deviceProp.l2CacheSize);

         }

 

#else

         // This only available in CUDA 4.0-4.2 (but these were only exposed in the

         // CUDA Driver API)

         int memoryClock;

         getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,

              dev);

         printf("  Memory Clock rate:                             %.0f Mhz\n",

              memoryClock * 1e-3f);

         int memBusWidth;

         getCudaAttribute<int>(&memBusWidth,

              CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);

         printf("  Memory Bus Width:                              %d-bit\n",

              memBusWidth);

         int L2CacheSize;

         getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);

 

         if (L2CacheSize) {

              printf("  L2 Cache Size:                                 %d bytes\n",

                  L2CacheSize);

         }

 

#endif

 

         printf(

              "  Maximum Texture Dimension Size (x,y,z)         1D=(%d), 2D=(%d, "

              "%d), 3D=(%d, %d, %d)\n",

              deviceProp.maxTexture1D, deviceProp.maxTexture2D[0],

              deviceProp.maxTexture2D[1], deviceProp.maxTexture3D[0],

              deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);

         printf(

              "  Maximum Layered 1D Texture Size, (num) layers  1D=(%d), %d layers\n",

              deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);

         printf(

              "  Maximum Layered 2D Texture Size, (num) layers  2D=(%d, %d), %d "

              "layers\n",

              deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1],

              deviceProp.maxTexture2DLayered[2]);

 

         printf("  Total amount of constant memory:               %lu bytes\n",

              deviceProp.totalConstMem);

         printf("  Total amount of shared memory per block:       %lu bytes\n",

              deviceProp.sharedMemPerBlock);

         printf("  Total number of registers available per block: %d\n",

              deviceProp.regsPerBlock);

         printf("  Warp size:                                     %d\n",

              deviceProp.warpSize);

         printf("  Maximum number of threads per multiprocessor:  %d\n",

              deviceProp.maxThreadsPerMultiProcessor);

         printf("  Maximum number of threads per block:           %d\n",

              deviceProp.maxThreadsPerBlock);

         printf("  Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",

              deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1],

              deviceProp.maxThreadsDim[2]);

         printf("  Max dimension size of a grid size    (x,y,z): (%d, %d, %d)\n",

              deviceProp.maxGridSize[0], deviceProp.maxGridSize[1],

              deviceProp.maxGridSize[2]);

         printf("  Maximum memory pitch:                          %lu bytes\n",

              deviceProp.memPitch);

         printf("  Texture alignment:                             %lu bytes\n",

              deviceProp.textureAlignment);

         printf(

              "  Concurrent copy and kernel execution:          %s with %d copy "

              "engine(s)\n",

              (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);

         printf("  Run time limit on kernels:                     %s\n",

              deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");

         printf("  Integrated GPU sharing Host Memory:            %s\n",

              deviceProp.integrated ? "Yes" : "No");

         printf("  Support host page-locked memory mapping:       %s\n",

              deviceProp.canMapHostMemory ? "Yes" : "No");

         printf("  Alignment requirement for Surfaces:            %s\n",

              deviceProp.surfaceAlignment ? "Yes" : "No");

         printf("  Device has ECC support:                        %s\n",

              deviceProp.ECCEnabled ? "Enabled" : "Disabled");

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)

         printf("  CUDA Device Driver Mode (TCC or WDDM):         %s\n",

              deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)"

              : "WDDM (Windows Display Driver Model)");

#endif

         printf("  Device supports Unified Addressing (UVA):      %s\n",

              deviceProp.unifiedAddressing ? "Yes" : "No");

         printf("  Device supports Compute Preemption:            %s\n",

              deviceProp.computePreemptionSupported ? "Yes" : "No");

         printf("  Supports Cooperative Kernel Launch:            %s\n",

              deviceProp.cooperativeLaunch ? "Yes" : "No");

         printf("  Supports MultiDevice Co-op Kernel Launch:      %s\n",

              deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No");

         printf("  Device PCI Domain ID / Bus ID / location ID:   %d / %d / %d\n",

              deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);

 

         const char *sComputeMode[] = {

              "Default (multiple host threads can use ::cudaSetDevice() with device "

              "simultaneously)",

              "Exclusive (only one host thread in one process is able to use "

              "::cudaSetDevice() with this device)",

              "Prohibited (no host thread can use ::cudaSetDevice() with this "

              "device)",

              "Exclusive Process (many threads in one process is able to use "

              "::cudaSetDevice() with this device)",

              "Unknown",

              NULL };

         printf("  Compute Mode:\n");

         printf("     < %s >\n", sComputeMode[deviceProp.computeMode]);

     }

 

}

 

 

 

 

 

arrow
arrow
    全站熱搜
    創作者介紹
    創作者 ghostyguo 的頭像
    ghostyguo

    No More Codes

    ghostyguo 發表在 痞客邦 留言(0) 人氣()