在Ubuntu下,使用C++进行GPU加速计算通常涉及以下几个步骤:
选择GPU加速库:
安装GPU加速库:
编写C++代码:
编译和运行:
以下是一个简单的示例,展示如何使用CUDA在Ubuntu下使用C++进行GPU加速计算:
下载CUDA Toolkit:
wget https://developer.download.nvidia.com/compute/cuda/11.4.2/local_installers/cuda_11.4.2_460.32.03_linux.run
运行安装程序:
sudo sh cuda_11.4.2_460.32.03_linux.run
按照提示完成安装,并设置环境变量:
echo 'export PATH=/usr/local/cuda-11.4/bin:$PATH' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-11.4/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
source ~/.bashrc
创建一个名为vector_add.cu
的文件,内容如下:
#include <iostream>
#include <cuda_runtime.h>
__global__ void vectorAdd(const float *A, const float *B, float *C, int numElements) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < numElements) {
C[index] = A[index] + B[index];
}
}
int main() {
int numElements = 256;
size_t size = numElements * sizeof(float);
float *h_A, *h_B, *h_C;
float *d_A, *d_B, *d_C;
// Allocate host memory
h_A = (float *)malloc(size);
h_B = (float *)malloc(size);
h_C = (float *)malloc(size);
// Initialize host memory
for (int i = 0; i < numElements; ++i) {
h_A[i] = rand() / (float)RAND_MAX;
h_B[i] = rand() / (float)RAND_MAX;
}
// Allocate device memory
cudaMalloc(&d_A, size);
cudaMalloc(&d_B, size);
cudaMalloc(&d_C, size);
// Copy host memory to device memory
cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);
// Launch vectorAdd kernel
int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
// Copy result back to host memory
cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);
// Verify the result
for (int i = 0; i < numElements; ++i) {
if (fabs(h_A[i] + h_B[i] - h_C[i]) > 1e-5) {
std::cerr << "Result verification failed at element "<< i << std::endl;
exit(EXIT_FAILURE);
}
}
std::cout << "Test PASSED" << std::endl;
// Free device memory
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
// Free host memory
free(h_A);
free(h_B);
free(h_C);
return 0;
}
使用nvcc
编译器编译代码:
nvcc vector_add.cu -o vector_add
运行生成的可执行文件:
./vector_add
这个示例展示了如何使用CUDA在Ubuntu下使用C++进行GPU加速计算。你可以根据具体需求调整代码和编译选项。