取得 CUDA 版的 OpenCV 後, 使用 OpenCV 前, 先建立環境變數 OPENCV_DIR, 使其指向 opencv 目錄 ( C:\opencv\build\ ):
建立名稱為 OpenCvExample 的 CUDA 專案, CUDA精靈會自動建立一個 kernel.cu 檔 :
在專案中增加 main.cpp 與 OpenCvExample.h檔:
專案預設將編譯為 x64 版本, 因此在設定專案的 VC++ 的 include 與 lib 目錄屬性, 將 include 設定為 $(OPENCV_DIR)\include, lib目錄設定為: $(OPENCV_DIR)\x64\vc15\lib :
設定 VC 的連結程式庫名稱, 加入 opencv_world343d.lib (Debug版) 或 opencv_world343.lib (Release版):
本例只有一個很簡單的複製功能, 主要是展示如何在 kernel 存取 Mat 物件的影像資料 data, 這樣便能利用 OpenCV 的檔案讀寫功能來進行檔案處理, 配合改變 kernel 函數即可做出不同的影像處理結果. 專案程式的函數說明:
函 數 |
說 明 |
main() |
|
CopyImage() |
|
__global__ void copy() |
平行處理複製來源影像 src至輸出影像 dest |
3個程式檔案的內容如下 :
OpenCvExample.h :
#pragma once
#include "cuda_runtime.h" #include "device_launch_parameters.h" #include <opencv2/core.hpp> #include <opencv2/imgcodecs.hpp> #include <opencv2/highgui.hpp> #include "opencv2/imgproc.hpp" #include <opencv2/core/cuda.hpp> #include <Windows.h>
using namespace cv; using namespace cuda;
__global__ void copy(byte *dest, byte *src, int channels); void CopyImage(Mat *dest, Mat *src); |
kernel.cu :
#include "OpenCvExample.h"
#define WindowRadius 1
__global__ void copy(byte *dest, byte *src, int channels) { int p = (blockIdx.x*blockDim.x + threadIdx.x)*channels; for (int c = 0; c < channels; c++) { dest[p + c] = src[p + c]; } }
void CopyImage(Mat *dest, Mat *src) { byte *dev_src = 0; byte *dev_dest = 0; size_t pixelBytes = src->total() *src->elemSize();
cudaError_t cudaStatus;
// Choose which GPU to run on cudaStatus = cudaSetDevice(0); if (cudaStatus != cudaSuccess) { fprintf(stderr, "cudaSetDevice failed!"); return; }
// Allocate GPU buffers
cudaStatus = cudaMalloc((void**)&dev_src, pixelBytes); if (cudaStatus != cudaSuccess) { fprintf(stderr, "cudaMalloc failed!"); return; } cudaStatus = cudaMalloc((void**)&dev_dest, pixelBytes); if (cudaStatus != cudaSuccess) { fprintf(stderr, "cudaMalloc failed!"); return; }
// Copy from host memory to GPU buffers. cudaStatus = cudaMemcpy(dev_src, src->data, pixelBytes, cudaMemcpyHostToDevice); if (cudaStatus != cudaSuccess) { fprintf(stderr, "cudaMemcpy failed!"); return; }
int Channel = src->channels(); copy<< <src->rows, src->cols >> > (dev_dest, dev_src, Channel);
// Check for any errors launching the kernel cudaStatus = cudaGetLastError(); if (cudaStatus != cudaSuccess) { fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus)); return; }
// cudaDeviceSynchronize waits for the kernel to finish cudaStatus = cudaDeviceSynchronize(); if (cudaStatus != cudaSuccess) { fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus); return; }
// Copy output from GPU buffer to host memory. cudaStatus = cudaMemcpy(dest->data, dev_dest, pixelBytes, cudaMemcpyDeviceToHost); if (cudaStatus != cudaSuccess) { fprintf(stderr, "cudaMemcpy failed!"); return; }
//check result /* bool ok = true; uint32_t Width = src->cols; uint32_t Height = src->rows; for (uint32_t x = 0; x < Width; x++) { for (uint32_t y = 0; y < Height; y++) { for (int c = 0; c < Channel; c++) { if (dest->data[(y*Width + x)*Channel + c] != src->data[(y*Width + x)*Channel + c]) { ok = false; break; } } } } fprintf(stderr, "ok=%s\n",ok?"ok":"fail"); */
// All done, reset the device cudaStatus = cudaDeviceReset(); if (cudaStatus != cudaSuccess) { fprintf(stderr, "cudaDeviceReset failed!"); return; } } |
main.cpp :
#include <stdio.h> #include <iostream> #include "OpenCvExample.h"
using namespace std;
int main(int argc, char** argv) { cudaError_t cudaStatus;
char *filename = "D:\\LENA.JPG";
// Load source image Mat srcMat = imread(filename, IMREAD_COLOR); // GRAYSCALE); // Read the file and convert to grayscale if (srcMat.empty()) // Check for invalid input { cout << "Could not open or find the image" << std::endl; return 1; }
//create output Mat of the same size as srcMt Mat destMat; destMat.create(srcMat.rows, srcMat.cols, srcMat.type());
// Run the parallel algorithm which should be in the .cu file CopyImage(&destMat, &srcMat);
namedWindow("Source window", WINDOW_AUTOSIZE); // Create a window for display. imshow("Source window", srcMat); // Show our image inside it.
namedWindow("Dest window", WINDOW_AUTOSIZE); // Create a window for display. imshow("Dest window", destMat); // Show our image inside it.
waitKey(0); // Wait for a keystroke in the window
return 0; }
|
執行結果畫面 :
留言列表