取得 CUDA 版的 OpenCV 後, 使用 OpenCV 前, 先建立環境變數 OPENCV_DIR, 使其指向 opencv 目錄 ( C:\opencv\build\ ):

undefined

建立名稱為 OpenCvExample 的 CUDA 專案, CUDA精靈會自動建立一個 kernel.cu 檔 :

undefined

在專案中增加 main.cpp  與 OpenCvExample.h檔:

undefined

專案預設將編譯為 x64 版本, 因此在設定專案的 VC++ 的 include 與 lib 目錄屬性, 將 include 設定為 $(OPENCV_DIR)\include, lib目錄設定為: $(OPENCV_DIR)\x64\vc15\lib :

undefined

undefined

 

設定 VC 的連結程式庫名稱, 加入  opencv_world343d.lib (Debug版) 或 opencv_world343.lib (Release版):

undefined

 

本例只有一個很簡單的複製功能, 主要是展示如何在 kernel 存取 Mat 物件的影像資料 data,  這樣便能利用 OpenCV 的檔案讀寫功能來進行檔案處理, 配合改變 kernel 函數即可做出不同的影像處理結果. 專案程式的函數說明: 

 

 

main()

  1. 建立 srcMat 並讀取影像檔
  2. 建立 destMat 準備放置處理結果
  3. 呼叫在 kernel.cu的處理函數 CopyImage()
  4. 顯示處理結果

CopyImage()

  1. 準備 CUDA device memory
  2. 呼叫 kernel 函數 copy()
  3. 複製處理結果至輸出 dest

__global__ void copy()

平行處理複製來源影像 src至輸出影像 dest

 

 

3個程式檔案的內容如下 :

 

OpenCvExample.h :

#pragma once

 

#include "cuda_runtime.h"

#include "device_launch_parameters.h"

#include <opencv2/core.hpp>

#include <opencv2/imgcodecs.hpp>

#include <opencv2/highgui.hpp>

#include "opencv2/imgproc.hpp"

#include <opencv2/core/cuda.hpp>

#include <Windows.h>

 

using namespace cv;

using namespace cuda;

 

__global__ void copy(byte *dest, byte *src, int channels);

void CopyImage(Mat *dest, Mat *src);

 

kernel.cu :

#include "OpenCvExample.h"

 

#define WindowRadius   1

 

__global__ void copy(byte *dest, byte *src, int channels)

{

     int p = (blockIdx.x*blockDim.x + threadIdx.x)*channels;

     for (int c = 0; c < channels; c++)

     {

         dest[p + c] = src[p + c];

     }

}

 

void CopyImage(Mat *dest, Mat *src)

{

     byte *dev_src = 0;

     byte *dev_dest = 0;

     size_t pixelBytes = src->total() *src->elemSize();

 

     cudaError_t cudaStatus;

 

 

     // Choose which GPU to run on

     cudaStatus = cudaSetDevice(0);

     if (cudaStatus != cudaSuccess) {

         fprintf(stderr, "cudaSetDevice failed!");

         return;

     }

 

     // Allocate GPU buffers

 

     cudaStatus = cudaMalloc((void**)&dev_src, pixelBytes);

     if (cudaStatus != cudaSuccess) {

         fprintf(stderr, "cudaMalloc failed!");

         return;

     }

     cudaStatus = cudaMalloc((void**)&dev_dest, pixelBytes);

     if (cudaStatus != cudaSuccess) {

         fprintf(stderr, "cudaMalloc failed!");

         return;

     }

 

     // Copy from host memory to GPU buffers.

     cudaStatus = cudaMemcpy(dev_src, src->data, pixelBytes, cudaMemcpyHostToDevice);

     if (cudaStatus != cudaSuccess) {

         fprintf(stderr, "cudaMemcpy failed!");

         return;

     }

 

     int Channel = src->channels();

     copy<< <src->rows, src->cols >> > (dev_dest, dev_src, Channel);

 

     // Check for any errors launching the kernel

     cudaStatus = cudaGetLastError();

     if (cudaStatus != cudaSuccess) {

         fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));

         return;

     }

 

     // cudaDeviceSynchronize waits for the kernel to finish

     cudaStatus = cudaDeviceSynchronize();

     if (cudaStatus != cudaSuccess) {

         fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);

         return;

     }

 

     // Copy output from GPU buffer to host memory.

     cudaStatus = cudaMemcpy(dest->data, dev_dest, pixelBytes, cudaMemcpyDeviceToHost);

     if (cudaStatus != cudaSuccess) {

         fprintf(stderr, "cudaMemcpy failed!");

         return;

     }

 

     //check result

     /*

     bool ok = true;

     uint32_t Width = src->cols;

     uint32_t Height = src->rows;

     for (uint32_t x = 0; x < Width; x++)

     {

         for (uint32_t y = 0; y < Height; y++)

         {

              for (int c = 0; c < Channel; c++) {

                  if (dest->data[(y*Width + x)*Channel + c] != src->data[(y*Width + x)*Channel + c])

                  {

                       ok = false;

                       break;

                  }

              }

         }

     }

     fprintf(stderr, "ok=%s\n",ok?"ok":"fail");

     */

 

     // All done, reset the device

     cudaStatus = cudaDeviceReset(); if (cudaStatus != cudaSuccess) {

         fprintf(stderr, "cudaDeviceReset failed!");

         return;

     }

}

 

main.cpp :

 

#include <stdio.h>

#include <iostream>

#include "OpenCvExample.h"

 

using namespace std;

 

int main(int argc, char** argv)

{

     cudaError_t cudaStatus;

 

     char *filename = "D:\\LENA.JPG";

 

     // Load source image

     Mat srcMat = imread(filename, IMREAD_COLOR); // GRAYSCALE); // Read the file and convert to grayscale

     if (srcMat.empty()) // Check for invalid input

     {

         cout << "Could not open or find the image" << std::endl;

         return 1;

     }

 

     //create output Mat of the same size as srcMt

     Mat destMat;

     destMat.create(srcMat.rows, srcMat.cols, srcMat.type());

 

     // Run the parallel algorithm which should be in the .cu file

     CopyImage(&destMat, &srcMat);

 

     namedWindow("Source window", WINDOW_AUTOSIZE); // Create a window for display.

     imshow("Source window", srcMat); // Show our image inside it.

 

     namedWindow("Dest window", WINDOW_AUTOSIZE); // Create a window for display.

     imshow("Dest window", destMat); // Show our image inside it.

 

     waitKey(0); // Wait for a keystroke in the window

 

     return 0;

}

 

 

 執行結果畫面 :

undefined

 

 

arrow
arrow
    全站熱搜

    ghostyguo 發表在 痞客邦 留言(0) 人氣()