博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
GPU demo +makefile
阅读量:5084 次
发布时间:2019-06-13

本文共 3569 字,大约阅读时间需要 11 分钟。

makefile

INCLUDE=-I/usr/local/cuda-9.2/includeLIB= -lcudadevrt  # -lcudart -lcublasCUDA_FLAG=-rdc=true  -gencode=arch=compute_35,code=\"sm_35,compute_35\"#all:TestGpuNPV.a:link.o cashflow_3.o qss_date4c_cu.o    ar crv $@ $^oTestGpu:TestGPU.cu    nvcc -o $@ $(INCLUDE) $^cashflow_3:qss_date4c_cu.o cashflow_3.o    nvcc -o $@ $(INCLUDE) $(CUDA_FLAG) $(LIB) $^Testcashflow_3:qss_date4c_cu.o cashflow_3.o Testcase_CashFlow_cuda.o excel.o    nvcc -o $@ $(INCLUDE) $(CUDA_FLAG) $(LIB) $^link.o:cashflow_3.o qss_date4c_cu.o    nvcc $(INCLUDE) $(CUDA_FLAG) $(LIB)  -dlink $^ -o $@Testcase_CashFlow_cuda.o:Testcase_CashFlow_cuda.cu    nvcc -c $@ $(INCLUDE) $(CUDA_FLAG) $(LIB) $^excel.o:excel.cc    g++ -c -std=c++11  $^cashflow_3.o:cashflow_3.cu    nvcc -c $@ $(INCLUDE) $(CUDA_FLAG) $(LIB) $^qss_date4c_cu.o:qss_date4c_cu.cu    nvcc -c $@ $(INCLUDE) $(CUDA_FLAG) $(LIB) $^clean:    rm *.o#lspci | grep -i nvidia#lspci -v -s 09:00.0#nvidia-smi#

 

GPUDEMO

// Helper function for using CUDA to add vectors in parallel.cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size){    int *dev_a = 0;    int *dev_b = 0;    int *dev_c = 0;    cudaError_t cudaStatus;    // Choose which GPU to run on, change this on a multi-GPU system.    cudaStatus = cudaSetDevice(0);    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");        goto Error;    }    // Allocate GPU buffers for three vectors (two input, one output)    .    cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaMalloc failed!");        goto Error;    }    cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaMalloc failed!");        goto Error;    }    cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaMalloc failed!");        goto Error;    }    // Copy input vectors from host memory to GPU buffers.    cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaMemcpy failed!");        goto Error;    }    cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaMemcpy failed!");        goto Error;    }    // Launch a kernel on the GPU with one thread for each element.    addKernel <<<1, size >>> (dev_c, dev_a, dev_b);    // Check for any errors launching the kernel    cudaStatus = cudaGetLastError();    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));        goto Error;    }    // cudaDeviceSynchronize waits for the kernel to finish, and returns    // any errors encountered during the launch.    cudaStatus = cudaDeviceSynchronize();    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);        goto Error;    }    // Copy output vector from GPU buffer to host memory.    cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaMemcpy failed!");        goto Error;    }Error:    cudaFree(dev_c);    cudaFree(dev_a);    cudaFree(dev_b);    return cudaStatus;}

 

posted on
2019-06-14 16:20 阅读(
...) 评论(
...)

转载于:https://www.cnblogs.com/liuguoyao514257665/p/11023937.html

你可能感兴趣的文章
python的列表与shell的数组
查看>>
关于TFS2010使用常见问题
查看>>
软件工程团队作业3
查看>>
python标准库——queue模块 的queue类(单向队列)
查看>>
火狐、谷歌、IE关于document.body.scrollTop和document.documentElement.scrollTop 以及值为0的问题...
查看>>
深入理解JVM读书笔记--字节码执行引擎
查看>>
vue-搜索功能-实时监听搜索框的输入,N毫秒请求一次数据
查看>>
批处理 windows 服务的安装与卸载
查看>>
React文档翻译 (快速入门)
查看>>
nodejs fs路径
查看>>
动态规划算法之最大子段和
查看>>
linux c:关联变量的双for循环
查看>>
深入浅出理解zend framework(三)
查看>>
python语句----->if语句,while语句,for循环
查看>>
javascript之数组操作
查看>>
LinkedList源码分析
查看>>
TF-IDF原理
查看>>
用JS制作博客页面背景随滚动渐变的效果
查看>>
JavaScript的迭代函数与迭代函数的实现
查看>>
一步步教你学会browserify
查看>>