在 Ubuntu 上使用 PyTorch 开展深度学习研究的实操指南
一 环境准备与安装
二 数据、模型与训练的最小可运行示例
代码示例:
python - <<‘PY’ import torch, torch.nn as nn, torch.optim as optim from torch.utils.data import DataLoader from torchvision import datasets, transforms
transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) train_ds = datasets.MNIST(root=‘./data’, train=True, download=True, transform=transform) test_ds = datasets.MNIST(root=‘./data’, train=False, download=True, transform=transform) train_loader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=2) test_loader = DataLoader(test_ds, batch_size=1000, shuffle=False, num_workers=2)
class MLP(nn.Module): def init(self): super().init() self.flatten = nn.Flatten() self.fc1 = nn.Linear(28*28, 128) self.relu = nn.ReLU() self.fc2 = nn.Linear(128, 10) def forward(self, x): x = self.flatten(x) x = self.relu(self.fc1(x)) return self.fc2(x)
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”) model = MLP().to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-3)
for epoch in range(5): model.train() running_loss = 0.0 for xb, yb in train_loader: xb, yb = xb.to(device), yb.to(device) optimizer.zero_grad() pred = model(xb) loss = criterion(pred, yb) loss.backward() optimizer.step() running_loss += loss.item() print(f"Epoch {epoch+1}: train loss {running_loss/len(train_loader):.4f}")
model.eval()
correct = total = 0
with torch.no_grad():
for xb, yb in test_loader:
xb, yb = xb.to(device), yb.to(device)
pred = model(xb).argmax(dim=1)
total += yb.size(0)
correct += (pred == yb).sum().item()
print(f"Test accuracy: {100*correct/total:.2f}%")
PY
三 GPU 训练与性能优化要点
四 进阶研究与工程化实践
五 常见问题排查清单