python3 --version
验证);git
、wget
、build-essential
(编译源码时需要)。sudo apt update && sudo apt upgrade -y # Ubuntu/Debian
sudo yum update -y # CentOS
sudo apt install -y python3 python3-pip python3-dev # Ubuntu/Debian
sudo yum install -y python3 python3-pip gcc # CentOS
sudo apt install nvidia-driver-535
);nvcc --version # 查看CUDA版本
nvidia-smi # 查看驱动与GPU状态
tar -xzvf cudnn-linux-x86_64-8.9.7.29_cuda12-archive.tar.xz
sudo cp cudnn-*-archive/include/cudnn*.h /usr/local/cuda/include
sudo cp -P cudnn-*-archive/lib/libcudnn* /usr/local/cuda/lib64/
sudo chmod a+r /usr/local/cuda/include/cudnn*.h /usr/local/cuda/lib64/libcudnn*
将CUDA路径添加至~/.bashrc
:
echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
source ~/.bashrc
# 创建并激活虚拟环境(避免依赖冲突)
conda create -n pytorch_env python=3.8 -y
conda activate pytorch_env
# 安装PyTorch(根据CUDA版本选择)
# CPU版本(无GPU加速)
conda install pytorch torchvision torchaudio cpuonly -c pytorch
# GPU版本(CUDA 11.8为例)
conda install pytorch torchvision torchaudio cudatoolkit=11.8 -c pytorch -c nvidia
# 创建虚拟环境(可选但推荐)
python3 -m venv pytorch_env
source pytorch_env/bin/activate
# 安装PyTorch(根据CUDA版本选择)
# CPU版本
pip install torch torchvision torchaudio
# GPU版本(CUDA 11.8为例)
pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
import torch
print("PyTorch版本:", torch.__version__)
print("CUDA是否可用:", torch.cuda.is_available()) # 应返回True(GPU版本)
print("GPU设备数量:", torch.cuda.device_count()) # 显示可用GPU数量
若输出显示版本号且torch.cuda.is_available()
为True
,则安装成功。
import torch
import torchvision.models as models
# 加载预训练模型(如ResNet50)
model = models.resnet50(pretrained=True)
model.eval() # 切换至推理模式
# 示例输入(模拟真实数据)
dummy_input = torch.rand(1, 3, 224, 224)
# 转换为TorchScript(两种方式:tracing/tracing+scripting)
scripted_model = torch.jit.trace(model, dummy_input)
# scripting_model = torch.jit.script(model) # 更灵活,支持控制流
# 保存模型
scripted_model.save("resnet50_scripted.pt")
print("模型已保存至 resnet50_scripted.pt")
TorchServe是PyTorch官方提供的模型服务工具,支持REST API与gRPC接口。
pip install torchserve torch-model-archiver
torch-model-archiver --model-name resnet50 \
--version 1.0 \
--serialized-file resnet50_scripted.pt \
--handler image_classifier \
--extra-files index_to_name.json \
--export-path model_store
--model-name
:模型名称;--serialized-file
:TorchScript模型文件路径;--handler
:处理请求的处理器(如image_classifier
用于图像分类);--extra-files
:额外文件(如类别映射表)。torchserve --start --model-store model_store --models resnet50=resnet50.mar
--model-store
:模型存储目录;--models
:模型名称与MAR文件的映射。curl -X POST http://localhost:8080/predictions/resnet50 -T test_image.jpg
返回结果为JSON格式的预测类别与概率。
# 安装Docker
sudo apt install -y docker.io
sudo systemctl start docker
sudo systemctl enable docker
# 安装NVIDIA Container Toolkit(支持GPU)
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt update
sudo apt install -y nvidia-docker2
sudo systemctl restart docker
# 使用官方PyTorch镜像(含CUDA)
FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-runtime
# 设置工作目录
WORKDIR /app
# 复制模型与代码
COPY resnet50_scripted.pt /app/model.pt
COPY app.py /app/
# 安装依赖
RUN pip install flask
# 暴露端口
EXPOSE 5000
# 启动应用
CMD ["python", "app.py"]
from flask import Flask, request, jsonify
import torch
import torchvision.transforms as transforms
from PIL import Image
app = Flask(__name__)
model = torch.jit.load("/app/model.pt")
model.eval()
# 图像预处理
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# 类别映射
classes = ["cat", "dog", "bird"] # 示例类别
@app.route("/predict", methods=["POST"])
def predict():
if "file" not in request.files:
return jsonify({"error": "No file uploaded"}), 400
file = request.files["file"]
image = Image.open(file.stream).convert("RGB")
input_tensor = transform(image).unsqueeze(0) # 添加batch维度
with torch.no_grad():
output = model(input_tensor)
_, predicted = torch.max(output, 1)
class_name = classes[predicted.item()]
return jsonify({"class": class_name, "confidence": float(output[0][predicted].item())})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)
# 构建镜像
docker build -t pytorch-resnet50 .
# 运行容器(GPU支持)
docker run --gpus all -p 5000:5000 -v /path/to/models:/app/models pytorch-resnet50
# 测试API(本地测试)
curl -X POST -F "file=@test_image.jpg" http://localhost:5000/predict
--gpus all
:允许容器访问GPU;-v
:挂载本地模型目录至容器(可选)。RuntimeError: CUDA error: no kernel image is available for execution on the device
;cudatoolkit
版本与系统安装的CUDA版本一致(如PyTorch安装cudatoolkit=11.8
,系统需安装CUDA 11.8)。ImportError: libcudart.so.11.8: cannot open shared object file
;LD_LIBRARY_PATH
,或重新安装对应版本的CUDA。RuntimeError: PytorchStreamReader failed reading zip archive
;通过以上步骤,你可以在Linux系统上完成PyTorch的部署,并实现模型的高性能推理服务。根据实际需求选择合适的部署方式(如TorchServe适合大规模API服务,Docker适合隔离环境),即可快速上线PyTorch应用。