1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
| import matplotlib.pyplot as plt import numpy as np import torch from torchvision.datasets import mnist import torchvision.transforms as transforms from torch.utils.data import DataLoader import torch.nn.functional as F import torch.optim as optim from torch import nn
train_batch_size = 64 test_batch_size = 128 learning_rate = 0.01 num_epoches = 20 lr = 0.01 momentum = 0.5
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]) train_dataset = mnist.MNIST('./data', train=True, transform=transform, download=True) test_dataset = mnist.MNIST('./data', train=False, transform=transform) train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)
class Net(nn.Module): def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim): super(Net, self).__init__() self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.BatchNorm1d(n_hidden_1)) self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.BatchNorm1d(n_hidden_2)) self.layer3 = nn.Sequential(nn.Linear(n_hidden_2, out_dim), nn.BatchNorm1d(out_dim))
def forward(self, x): x = F.relu(self.layer1(x)) x = F.relu(self.layer2(x)) x = self.layer3(x) return x
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Net(28 * 28, 300, 100, 10)
criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
losses = [] acces = [] eval_losses = [] eval_acces = [] model.to(device)
for epoch in range(num_epoches): train_loss = 0 train_acc = 0 model.train()
if epoch % 5 == 0: optimizer.param_groups[0]['lr'] *= 0.1
for img, label in train_loader: img = img.to(device) label = label.to(device) img = img.view(img.size(0), -1)
out = model(img) loss = criterion(out, label)
optimizer.zero_grad() loss.backward() optimizer.step()
train_loss += loss.item()
_, pred = out.max(1) num_correct = (pred == label).sum().item() acc = num_correct / img.shape[0] train_acc += acc
losses.append(train_loss / len(train_loader)) acces.append(train_acc / len(train_loader))
eval_loss = 0 eval_acc = 0 model.eval() for img, label in test_loader: img = img.to(device) label = label.to(device) img = img.view(img.size(0), -1)
out = model(img) loss = criterion(out, label)
eval_loss += loss.item()
_, pred = out.max(1) num_correct = (pred == label).sum().item() acc = num_correct / img.shape[0] eval_acc += acc
eval_losses.append(eval_loss / len(test_loader)) eval_acces.append(eval_acc / len(test_loader))
print( f'epoch:{epoch},Train Loss:{round(train_loss / len(train_loader), 2)},Train Acc:{round(train_acc / len(train_loader), 2)},' f'Test Loss:{round(eval_loss / len(test_loader), 2)},Test Acc:{round(eval_acc / len(test_loader), 2)}')
plt.title('trainloss') plt.plot(np.arange(len(losses)), losses) plt.legend(['Train Loss'], loc='upper right') plt.show()
|