猫狗大战


猫狗大战

通过来自 kaggle 上的猫狗数据集,训练一个识别猫狗图片的分类器。要求 设计一个使用 ResNet18 作为主干的卷积神经网络,在迁移网络时采用固定值模 式,要求模型的准确率不低于 90%。猫狗大战数据集训练集有 25000 张,猫狗各 占一半。测试集 12500 张。

1.准备数据

import torch
import torch.nn as nn
import torch. nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
import matplotlib.pyplot as plt
import numpy as np
import time
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import models, datasets, transforms
from PIL import Image

由于给定的数据集,缺少label,故需要自定义数据集,并打上标签,因为数据集里的图片以cat_xx、dog_xx命名,那么就可以根据图片名称来打上label,例如cat:0、dog:1

class MyDataset(Dataset):
    def __init__(self, root_path, transform=None):
        self.label_name = {"Cat": 0, "Dog": 1}
        self.root_path = root_path
        self.transform = transform
        self.get_train_img_info()

    def __getitem__(self, index):
        self.img = Image.open(os.path.join(self.root_path, self.train_img_name[index]))
        if self.transform is not None:
            self.img = self.transform(self.img)
        self.label = self.train_img_label[index]
        return self.img, self.label

    def __len__(self):
        return len(self.train_img_name)

    def get_train_img_info(self):
        self.train_img_name = os.listdir(self.root_path)
        self.train_img_label = [0 if 'cat' in imgname else 1 for imgname in self.train_img_name]

同样划分成训练集、验证集、测试集,并且对数据集进行数据增强,如随机翻转,改变尺寸等

def gen_data():
    transform_train = transforms.Compose([
                                          transforms.RandomHorizontalFlip(),
                                          transforms.RandomGrayscale(),
                                          transforms.Resize([224,224]),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                      ])                                
    
    transform_test = transforms.Compose([
                                          transforms.Resize([224,224]),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                      ])     
    
    train_data = MyDataset(root_path=os.path.join(os.getcwd(), 'datasets/train'), transform=transform_train)
    test_data = MyDataset(root_path=os.path.join(os.getcwd(), 'datasets/test'), transform=transform_test)
    
    
    num_train = len(train_data)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(0.2 * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    
    
    train_loader = DataLoader(dataset=train_data, batch_size=64, sampler=train_sampler)
    valid_loader = DataLoader(dataset=train_data, batch_size=64, sampler=valid_sampler)

    test_loader = DataLoader(dataset=test_data, batch_size=64)
    
    print(len(test_data))
    img = test_data[10][0]
    plt.imshow(img[0, :])
    print(test_data[10][1])
    
    return train_loader, valid_loader, test_loader

数据集图片如下图所示

数据集中的图片

2.建立模型

应用迁移学习,主干网络选用resnet18,从而只需将resnet18的最后一层全连接层改为输出只有2的全连接层

# 加载resnet18作为预训练模型
net = models.resnet18(pretrained=True)

# 冻结参数
for param in net.parameters():
    param.requires_grad = False

# 改造最后一层,输出改为两个神经元
net.fc = nn.Linear(net.fc.in_features, 2)

# 在GPU上训练
if torch.cuda.is_available():
    net = net.cuda()
    net.fc = net.fc.cuda()

3.训练模型

def train(train_loader, valid_loader, net, loss_func, opt, epochs):
    valid_loss_min = np.Inf    
    start = time.time()
    train_loss_list = []
    valid_loss_list = []
    
    for epoch in range(epochs):
        
        train_loss = 0.0
        valid_loss = 0.0
        
        net.train()
        for x, y in train_loader:
            
            x = x.cuda()
            y = y.cuda()
            
            pred = net(x)
            loss = loss_func(pred, y)
            
            opt.zero_grad()
            loss.backward()
            opt.step()
            
            train_loss += loss.item() * x.size(0)
        
        net.eval()
        for x, y in valid_loader:
            
            x = x.cuda()
            y = y.cuda()
            
            pred = net(x)
            loss = loss_func(pred, y)
            
            valid_loss += loss.item() * x.size(0)

        # 计算平均损失
        train_loss = train_loss/len(train_loader.sampler)
        valid_loss = valid_loss/len(valid_loader.sampler)
        
        train_loss_list.append(train_loss)
        valid_loss_list.append(valid_loss)

        # 显示训练集与验证集的损失函数 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch + 1, train_loss, valid_loss))

        # 如果验证集损失函数减少,就保存模型。
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
            torch.save(net.state_dict(), 'cat_vs_dog.pt')
            valid_loss_min = valid_loss
        print("spend times: ", time.time() - start)
        
    plt.plot(np.arange(epochs), train_loss_list, 'r', label='train')
    plt.plot(np.arange(epochs), valid_loss_list, 'b', label='valid')
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.legend()
    plt.show()
train_loader, valid_loader, test_loader = gen_data()
loss_func = nn.CrossEntropyLoss()
opt = torch.optim.Adam(net.fc.parameters(), lr=0.001)
epochs = 30
train(train_loader, valid_loader, net, loss_func, opt, epochs)
训练误差

4.预测结果

def prediction(test_loader, net):
    rights = 0
    length = 0
    for i, data in enumerate(test_loader):
        x, y = data
        
        x = x.cuda()
        y = y.cuda()
        
        pred = net(x)
        rights += rightness(pred, y)[0]
        length += rightness(pred, y)[1]
        
    print(rights, length, 'acc: {:.6f}'.format(rights / length))

def rightness (pred, labels):
    pred = torch.max(pred.data, 1)[1]
    rights = pred.eq(labels.data.view_as(pred)).sum()
    return rights, len(labels)       
net.load_state_dict(torch.load('cat_vs_dog.pt'))
prediction(test_loader, net)
准确率

Author: Paranoid
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint policy. If reproduced, please indicate source Paranoid !
评论
  TOC