[CUDA] 與 OpenCV 共舞－No More Codes

取得 CUDA 版的 OpenCV 後, 使用 OpenCV 前, 先建立環境變數 OPENCV_DIR, 使其指向 opencv 目錄 ( C:\opencv\build\ ):

undefined

建立名稱為 OpenCvExample 的 CUDA 專案, CUDA精靈會自動建立一個 kernel.cu 檔 :

undefined

在專案中增加 main.cpp 與 OpenCvExample.h檔:

undefined

專案預設將編譯為 x64 版本, 因此在設定專案的 VC++ 的 include 與 lib 目錄屬性, 將 include 設定為 $(OPENCV_DIR)\include, lib目錄設定為: $(OPENCV_DIR)\x64\vc15\lib :

undefined

設定 VC 的連結程式庫名稱, 加入 opencv_world343d.lib (Debug版) 或 opencv_world343.lib (Release版):

undefined

本例只有一個很簡單的複製功能, 主要是展示如何在 kernel 存取 Mat 物件的影像資料 data, 這樣便能利用 OpenCV 的檔案讀寫功能來進行檔案處理, 配合改變 kernel 函數即可做出不同的影像處理結果. 專案程式的函數說明:

函數	說明
main()	建立 srcMat 並讀取影像檔建立 destMat 準備放置處理結果呼叫在 kernel.cu的處理函數 CopyImage() 顯示處理結果
CopyImage()	準備 CUDA 的 device memory 呼叫 kernel 函數 copy() 複製處理結果至輸出 dest
__global__ void copy()	平行處理複製來源影像 src至輸出影像 dest

3個程式檔案的內容如下 :

OpenCvExample.h :

#pragma once

#include "cuda_runtime.h"

#include "device_launch_parameters.h"

#include <opencv2/core.hpp>

#include <opencv2/imgcodecs.hpp>

#include <opencv2/highgui.hpp>

#include "opencv2/imgproc.hpp"

#include <opencv2/core/cuda.hpp>

#include <Windows.h>

using namespace cv;

using namespace cuda;

__global__ void copy(byte *dest, byte *src, int channels);

void CopyImage(Mat *dest, Mat *src);

kernel.cu :

#include "OpenCvExample.h"

#define WindowRadius 1

__global__ void copy(byte *dest, byte *src, int channels)

{

int p = (blockIdx.x*blockDim.x + threadIdx.x)*channels;

for (int c = 0; c < channels; c++)

{

dest[p + c] = src[p + c];

}

void CopyImage(Mat *dest, Mat *src)

{

byte *dev_src = 0;

byte *dev_dest = 0;

size_t pixelBytes = src->total() *src->elemSize();

cudaError_t cudaStatus;

// Choose which GPU to run on

cudaStatus = cudaSetDevice(0);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaSetDevice failed!");

return;

}

// Allocate GPU buffers

cudaStatus = cudaMalloc((void**)&dev_src, pixelBytes);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaMalloc failed!");

return;

}

cudaStatus = cudaMalloc((void**)&dev_dest, pixelBytes);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaMalloc failed!");

return;

}

// Copy from host memory to GPU buffers.

cudaStatus = cudaMemcpy(dev_src, src->data, pixelBytes, cudaMemcpyHostToDevice);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaMemcpy failed!");

return;

}

int Channel = src->channels();

copy<< <src->rows, src->cols >> > (dev_dest, dev_src, Channel);

// Check for any errors launching the kernel

cudaStatus = cudaGetLastError();

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));

return;

}

// cudaDeviceSynchronize waits for the kernel to finish

cudaStatus = cudaDeviceSynchronize();

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);

return;

}

// Copy output from GPU buffer to host memory.

cudaStatus = cudaMemcpy(dest->data, dev_dest, pixelBytes, cudaMemcpyDeviceToHost);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaMemcpy failed!");

return;

}

//check result

bool ok = true;

uint32_t Width = src->cols;

uint32_t Height = src->rows;

for (uint32_t x = 0; x < Width; x++)

{

for (uint32_t y = 0; y < Height; y++)

{

for (int c = 0; c < Channel; c++) {

if (dest->data[(y*Width + x)*Channel + c] != src->data[(y*Width + x)*Channel + c])

{

ok = false;

break;

}

fprintf(stderr, "ok=%s\n",ok?"ok":"fail");

// All done, reset the device

cudaStatus = cudaDeviceReset(); if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaDeviceReset failed!");

return;

}

main.cpp :

#include <stdio.h>

#include <iostream>

#include "OpenCvExample.h"

using namespace std;

int main(int argc, char** argv)

{

cudaError_t cudaStatus;

char *filename = "D:\\LENA.JPG";

// Load source image

Mat srcMat = imread(filename, IMREAD_COLOR); // GRAYSCALE); // Read the file and convert to grayscale

if (srcMat.empty()) // Check for invalid input

{

cout << "Could not open or find the image" << std::endl;

return 1;

}

//create output Mat of the same size as srcMt

Mat destMat;

destMat.create(srcMat.rows, srcMat.cols, srcMat.type());

// Run the parallel algorithm which should be in the .cu file

CopyImage(&destMat, &srcMat);

namedWindow("Source window", WINDOW_AUTOSIZE); // Create a window for display.

imshow("Source window", srcMat); // Show our image inside it.

namedWindow("Dest window", WINDOW_AUTOSIZE); // Create a window for display.

imshow("Dest window", destMat); // Show our image inside it.

waitKey(0); // Wait for a keystroke in the window

return 0;

}

執行結果畫面 :

undefined

ghostyguo

No More Codes

ghostyguo 發表在痞客邦留言(0) 人氣()

E-mail轉寄

No More Codes

歡迎光臨ghostyguo在痞客邦的小天地

[CUDA] 與 OpenCV 共舞

歷史上的今天

留言列表

文章分類

雜項記錄 (3)

硬體設計 (1)

訊號處理 (4)

Raspberry Pi (1)

程式設計 (19)

熱門文章

最新留言

參觀人氣

QR Code

文章搜尋

最新文章