手写数字识别器


手写数字识别器

通过 MNIST 数据集训练得到一个手写数字分类器。要求设计一个至 少包含 2 个卷积层和池化层的卷积神经网络。卷积核的尺寸不小于 5*5,要求训 后的得到的网络在测试集确率不低于 96%(要求在网络中使用 dropout)

1.准备数据

import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import torch. nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
import matplotlib.pyplot as plt
import numpy as np
import time
from torch.utils.data.sampler import SubsetRandomSampler
train_data = datasets.MNIST(root='./',
                      train=True,
                      transform=transforms.Compose([
                          transforms.ToTensor(),
                          transforms.Normalize([0.1307,], [0.3081,])
                      ]),
                      download=True
                     )

test_data = datasets.MNIST(root='./',
                      train=False,
                      transform=transforms.Compose([
                          transforms.ToTensor(),
                          transforms.Normalize([0.1307,], [0.3081,])
                      ])
                     )

num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(0.2 * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = DataLoader(dataset=train_data, batch_size=64, sampler=train_sampler)
valid_loader = DataLoader(dataset=train_data, batch_size=64, sampler=valid_sampler)
test_loader = DataLoader(dataset=test_data, batch_size=64)

这里增加了验证集,其目的是挑选泛化能力好的模型,并且在训练时保存起来

2.构建模型

两个卷积层和最大池化层,最后要将输出展平,作为全连接的输入,输入的尺寸需要计算

class model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 4, 5, padding=2)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(4, 8, 5 ,padding=2)
        self.fc1 = nn.Linear((28 * 28) // (4 * 4) * 8, 512)
        self.fc2 = nn.Linear(512, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = x.view(-1, (28 * 28) // (4 * 4) * 8)
        x = self.fc1(x)
        x = F.relu(x)
        
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        
        return x
    
    def feature_map(self, x):
        map1 = self.conv1(x)
        map1 = F.relu(map1)
        
        map2 = self.pool(map1)
        map2 = self.conv2(map2)
        map2 = F.relu(map2)
        
        return map1, map2

这里运用了dropout,其目的是为了减少过拟合,feature_map方法返回卷积层的输出,绘制特征图

3.训练模型

def train(train_loader, valid_loader, net, loss_func, opt, epochs):
    valid_loss_min = np.Inf
    start = time.time()
    
    train_loss_list = []
    valid_loss_list = []
    
    for epoch in range(epochs):
        
        train_loss = 0.0
        valid_loss = 0.0
        
        net.train()
        for x, y in train_loader:
            x = x.cuda()
            y = y.cuda()
            
            pred = net(x)
            loss = loss_func(pred, y)
            
            opt.zero_grad()
            loss.backward()
            opt.step()
            
            train_loss += loss.item() * x.size(0)
        
        net.eval()
        for x, y in valid_loader:
            
            x = x.cuda()
            y = y.cuda()
            
            pred = net(x)
            loss = loss_func(pred, y)
            
            valid_loss += loss.item() * x.size(0)

        # 计算平均损失
        train_loss = train_loss/len(train_loader.sampler)
        valid_loss = valid_loss/len(valid_loader.sampler)
        
        train_loss_list.append(train_loss)
        valid_loss_list.append(valid_loss)
        

        # 显示训练集与验证集的损失函数 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, train_loss, valid_loss))

        # 如果验证集损失函数减少,就保存模型。
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
            torch.save(net.state_dict(), 'model_hnr.pt')
            valid_loss_min = valid_loss
            
        print('spend_time: ', time.time() - start)
            
    plt.plot(np.arange(epochs), train_loss_list, 'r', label='train')
    plt.plot(np.arange(epochs), valid_loss_list, 'b', label='valid')
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.legend()
    plt.show()
cnn = model().cuda() # GPU运行
loss_func = nn.CrossEntropyLoss()
opt = torch.optim.SGD(cnn.parameters(), lr = 0.001, momentum=0.9)
epochs = 10
train(train_loader, valid_loader, cnn, loss_func, opt, epochs)

model().cuda()可以将模型加载到GPU上训练,严重提高训练速度

训练损失

4.预测结果

def prediction(test_loader, net):
    rights = 0
    length = 0
    
    net.eval()
    for i, data in enumerate(test_loader):
        x, y = data
        
        x = x.cuda()
        y = y.cuda()
        
        pred = net(x)
        rights += rightness(pred, y)[0]
        length += rightness(pred, y)[1]
        
    print(rights / length)

def rightness (pred, labels):
    pred = torch.max(pred. data,1)[1]
    rights = pred.eq(labels.data.view_as(pred)).sum()
    return rights, len(labels)   


cnn.load_state_dict(torch.load('model_h.pt'))
prediction(test_loader, cnn)
准确率

5.输出特征图

def show(net):
    for i in range(4):
        plt.subplot(1, 4, i + 1)
        plt.imshow(net.conv1.weight.cpu().detach().numpy()[i, 0, :])
    plt.show()
    
    x = train_data[50][0].unsqueeze(0).cuda()
    map1, map2 = net.feature_map(x)
    
    for i in range(4):
        plt.subplot(1, 4, i + 1)
        print(map1.detach().cpu().numpy().shape)
        plt.imshow(map1.detach().cpu()[0, i, :].numpy())
    plt.show()

    for i in range(4):
        plt.subplot(1, 4, i + 1)
        plt.imshow(map2.detach().cpu()[0, i, :].numpy())
    plt.show()
    
show(cnn)
特征图

需要将GPU格式的数据转换为CPU格式的数据,即调用cpu()


Author: Paranoid
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint policy. If reproduced, please indicate source Paranoid !
评论
  TOC