基于Pytorch的CNN图像分类器
发布时间
阅读量:
阅读量
基于Pytorch的CNN图像分类器,并利用OpenCV进行数据预处理
CNN(卷积神经网络)在图像处理方面展现出卓越的性能,在图像处理领域也得到了广泛应用。然而,在AlphaGO中,并未采用池化层操作。
我采用基本的二分类器作为例子来研究应用卷积神经网络(CNN)进行图像分类的过程:首先从图像处理开始逐步进入神经网络的训练阶段。
代码中含有详细注释
数据预处理:
#导入库
import os
import numpy as np
import cv2
# 定义读取图像的函数
def readimg(path):
# 使用OpenCV库中的.imread()函数读取图像
img = cv2.imread(path)
# 使用OpenCV库中的.resize()函数修改图像尺寸,为了后面训练神经网络时得到更快的速度
img = cv2.resize(img,(128,128),0)
return img
# 定义函数,将修改后的图像保存
def save(path, img):
# 使用OpenCV库中的.shape()方法获取图像的高和宽,.shape()
rows, cols = img.shape[:2]
# 将图像以一度为步长,旋转360次
for i in range(0,359):
# 使用OpenCV库中的.getRotationMatrix2D()和.warpAffine()方法将图像旋转,以处理数据
# 第一个参数是旋转中心,第二个参数是旋转角度,第三个参数是缩放比例
M = cv2.getRotationMatrix2D((cols/2, rows/2), i, 1)
img1 = cv2.warpAffine(img, M, (cols, rows))
i = str(i)
img_name = i+".jpg"
# 存储图像
cv2.imwrite(os.path.join(path, img_name),img1)
path = '/Users/jason_zhang/Desktop/'
for j in range(1,3):
j = str(j)
no_img = j + '.jpg'
img_path = os.path.join(path, no_img)
new_img_path = os.path.join(path, j)
img = readimg(img_path)
save(new_img_path ,img)
print("Saving done")
输出结果:

此时,数据已经处理完,并存出在相应文件夹中。



导入数据:
# 导入库
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
# 此处的n用于判定lables
n = 1
# 定义读取图像的函数
def readfile(path):
# 使用sorted()函数,将image的父目录下的图像名称罗列出来,并存储为列表
image_dir = sorted(os.listdir(path))
# 创建features和lables,features为matrices
x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
y = np.zeros((len(image_dir)), dtype=np.uint8)
# 使用wnumerate()函数枚举出文件,并为之标号
for i, file in enumerate(image_dir):
img = cv2.imread(os.path.join(path, file))
x[i, :, :] = cv2.resize(img,(128, 128))
if n == 1:
y[i] = 1
elif n ==2:
y[i] = 2
return x,y
img_dir = '/Users/jason_zhang/Desktop'
print("Reading data")
# 获取images和lables
# train_是image的matrices
# 使用os库的.path.jion()方法,将image的路径和image的名称连接在一起
train_x_1, train_y_1 = readfile(os.path.join(img_dir, '1'))
# 此处n用于判定lables
n = 2
train_x_2, train_y_2 = readfile(os.path.join(img_dir, '2'))
print("Size of data = {}".format(len(train_x_1)))
print("Size of data = {}".format(len(train_x_2)))
print("Reading done")
输出结果:

# 将两组matrices, lebles分别连接在一起
train_x = np.concatenate([train_x_1, train_x_2], axis = 0)
train_y = np.concatenate([train_y_1, train_y_2], axis = 0)
print("Size of data = {}".format(len(train_x)))
输出结果:

创建数据集:
# training 时做 data augmentation
train_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(), #随即将图片水平翻转
transforms.RandomRotation(15), #随机旋转图片
transforms.ToTensor(), #將matrices转成 Tensor,並把数值normalize到[0,1](data normalization)
])
# 定义ImgDataset()加载数据
class ImgDataset(Dataset):
def __init__(self, x, y=None, transform=None):
self.x = x
self.y = y
self.transform = transform
def __len__(self):
return len(self.x)
def __getitem__(self, index): # 根据index返回一行数据
X = self.x[index]
if self.transform is not None:
X = self.transform(X)
Y = self.y[index]
return X, Y
# 批量再入数据集
batch_size = 32
train_set = ImgDataset(train_x, train_y, train_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
定义Classifier分类器
class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
# torch.nn.MaxPool2d(kernel_size, stride, padding)
# input 维度 [3, 128, 128]
self.cnn = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [64, 64, 64]
nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [128, 32, 32]
nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [256, 16, 16]
nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 8, 8]
nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
)
# 定义全连接层
self.fc = nn.Sequential(
nn.Linear(512*4*4, 1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 3)
)
# 定义forward
def forward(self, x):
out = self.cnn(x)
out = out.view(out.size()[0], -1)
return self.fc(out)
训练CNN:
model = Classifier().cuda() # 传送模型到gpu
loss = nn.CrossEntropyLoss() # 因为是 classification task,所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 10
for epoch in range(num_epoch):
train_acc = 0.0
train_loss = 0.0
model.train()
for i, data in enumerate(train_loader):
optimizer.zero_grad() # 梯度清零,因为梯度回累加
train_pred = model(data[0].cuda())
data[1] = data[1].type(torch.LongTensor) # 将lables转换成LongTensor
batch_loss = loss(train_pred, data[1].cuda())
batch_loss.backward() # backward() 计算梯度
optimizer.step() # 更新参数
# 计算accuracy
train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
train_loss += batch_loss.item()
print('''[%03d/%03d] Train Acc: %3.6f Loss: %3.6f ''' % (epoch + 1, num_epoch,train_acc/train_set.__len__(), train_loss/train_set.__len__()))
输出结果:

显而易见,这个模型已经过拟合了,但是我不想再调整它了。
未采用验证集与测试集的原因是为了专注于学习CNN架构及其训练流程,并实现数据的小批量读取与处理过程;同时结合OpenCV技术完成相关的图像处理工作
全部评论 (0)
还没有任何评论哟~
