PyTorch
问题
PyTorch 的核心概念有哪些?如何构建和训练一个神经网络?
答案
Tensor 与自动求导
import torch
# 创建 Tensor
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x ** 2 + 2 * x + 1
# 自动求导
loss = y.sum()
loss.backward()
print(x.grad) # dy/dx = 2x + 2 → [4.0, 6.0, 8.0]
模型构建
import torch.nn as nn
class TextClassifier(nn.Module):
def __init__(self, vocab_size: int, embed_dim: int, num_classes: int):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.encoder = nn.LSTM(embed_dim, 128, batch_first=True, bidirectional=True)
self.classifier = nn.Sequential(
nn.Linear(256, 64),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(64, num_classes),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
embedded = self.embedding(x) # (batch, seq_len, embed_dim)
_, (hidden, _) = self.encoder(embedded) # (2, batch, 128)
# 拼接双向 LSTM 的输出
hidden = torch.cat([hidden[0], hidden[1]], dim=1) # (batch, 256)
return self.classifier(hidden)
训练循环
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TextClassifier(vocab_size=10000, embed_dim=128, num_classes=3).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.01)
criterion = nn.CrossEntropyLoss()
for epoch in range(10):
model.train()
total_loss = 0.0
for batch_x, batch_y in train_loader:
batch_x, batch_y = batch_x.to(device), batch_y.to(device)
optimizer.zero_grad()
output = model(batch_x)
loss = criterion(output, batch_y)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
total_loss += loss.item()
# 验证
model.eval()
with torch.no_grad():
correct = sum(
(model(x.to(device)).argmax(1) == y.to(device)).sum().item()
for x, y in val_loader
)
Dataset 与 DataLoader
from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):
def __init__(self, texts, labels):
self.texts = texts
self.labels = labels
def __len__(self):
return len(self.texts)
def __getitem__(self, idx):
return self.texts[idx], self.labels[idx]
train_loader = DataLoader(
MyDataset(texts, labels),
batch_size=32,
shuffle=True,
num_workers=4, # 多进程数据加载
pin_memory=True, # GPU 训练时启用
)
常见面试问题
Q1: PyTorch 和 TensorFlow 的区别?
答案:
| 特性 | PyTorch | TensorFlow |
|---|---|---|
| 计算图 | 动态(define-by-run) | 静态 + Eager mode |
| 调试 | 原生 Python 调试 | 需 tf.debugging |
| 部署 | TorchScript、ONNX | SavedModel、TF Serving |
| 学术界 | 主流(>80% 论文) | 份额下降 |
| 工业界 | 快速增长 | 仍有大量存量 |
Q2: model.train() 和 model.eval() 的区别?
答案:
model.train():启用 Dropout 和 BatchNorm 的训练行为model.eval():关闭 Dropout,BatchNorm 使用全局统计量
model.eval() 不会自动禁止梯度计算,仍需 torch.no_grad() 节省内存。
Q3: 如何保存和加载模型?
答案:
# 保存(推荐保存 state_dict)
torch.save(model.state_dict(), "model.pth")
# 加载
model = TextClassifier(...)
model.load_state_dict(torch.load("model.pth", map_location=device))
model.eval()
Q4: 如何做混合精度训练?
答案:
from torch.amp import autocast, GradScaler
scaler = GradScaler()
for batch_x, batch_y in train_loader:
optimizer.zero_grad()
with autocast(device_type="cuda"): # FP16 前向传播
output = model(batch_x)
loss = criterion(output, batch_y)
scaler.scale(loss).backward() # 缩放梯度
scaler.step(optimizer)
scaler.update()
混合精度可以减少一半显存,训练速度提升 50%-100%。