12猫分类学习笔记二(训练篇)

训练的代码集中在cat_trian.py里。编译执行该文件即可进行训练

1 导包

1
2
3
4
5
6
7
import torch
from torch import nn,optim
import torchvision.transforms as transforms
import torchvision
import pandas as pd
import os
from PIL import Image

2 全局变量

1
2
3
4
5
6
7
8
#  种类数
num_classes = 12 # 分类别数量
# 导出模型文件名称
cat_model_name = "cat_model.pth"
# 超参数?
batch_size = 32 # 一次批数
loss_fn = nn.CrossEntropyLoss() # 损失函数设置
epochs = 3 # 训练迭代次数设置

3 transform(数据处理,数据增强)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#定义transform
transform = transforms.Compose([
transforms.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05),
# transforms.ColorJitter 改变图像的属性:亮度(brightness)、对比度(contrast)、饱和度(saturation)和色调(hue)。
# 数值表示上下浮动的百分比。比如:0.05,原亮度100 ->(95,105)范围内。
transforms.Resize((256, 256)), # 缩放到指定大小
transforms.CenterCrop(196), # 中心随机裁剪
transforms.RandomHorizontalFlip(p=0.5), # 0.5 概率水平翻转
transforms.RandomVerticalFlip(p=0.5), # 0.5 概率垂直翻转
transforms.ToTensor(), # 转换为张量
transforms.Normalize(mean=[0.4848, 0.4435, 0.4023], std=[0.2744, 0.2688, 0.2757]), # 归一化
])

eval_transform = transforms.Compose([
transforms.Resize((256, 256)), # 缩放到指定大小
transforms.CenterCrop(196), # 中心随机裁剪
transforms.ToTensor(), # 转换为张量
transforms.Normalize(mean =[0.4848, 0.4435, 0.4023], std=[0.2744, 0.2688, 0.2757]), # 归一化
])

注意:本次为了提高准确率,都尝试了很多的方法,这个是我根据直觉,以及网上推荐的一些博客进行选择的。

第二名方案

猫十二分类

这个第二名方案很值得看看,也对基本的处理方法有一定的讲解。我在自己的电脑炮,batch_size连64都达不到,如果训练420次,估计得算个天荒黑夜。

4 神经网络的搭建(Resnet50)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
class myRes(nn.Module):
def __init__(self):
super(myRes, self).__init__()
# 当前文件夹没有模型时,就导入resnet50原的训练参数True。有模型,直接导入已有模型False。即迁移学习
if os.path.isfile(cat_model_name):
a = False
else:
a = True
self.resnet = torchvision.models.resnet50(pretrained=a)
self.add_module('add_Linear', nn.Linear(1000, num_classes))

def forward(self, x):
x = self.resnet(x)
return x

5 训练函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def train(train_dataloader, model, loss_fn, optimizer):
"""
训练网络

输入:
train_dataloader: 训练集的dataloader
model: 网络模型
loss_fn: 损失函数
optimizer: 优化器
"""
# 切换到train模式
model.train(True)
# 遍历dataloader
for images, labels in train_dataloader:
# 将数据和标签加载到device上
images, labels = images.to(device), labels.to(device)
# 输入数据到模型里得到输出
optimizer.zero_grad()
pred = model(images)
# 计算输出和标签的loss
loss = loss_fn(pred, labels)
# 反向推导
loss.backward()
# 步进优化器
optimizer.step()

size = len(train_dataloader.dataset)
# 测试集的batch数量
num_batches = len(train_dataloader)
# 切换到测试模型
model.eval()
# 记录loss和准确率
test_loss, correct = 0, 0
# 切换到测试模型
model.eval()
# 记录loss和准确率
test_loss, correct = 0, 0
# 梯度截断
with torch.no_grad():
for images, labels in train_dataloader: # 遍历batch
# 加载到device
images, labels = images.to(device), labels.to(device)
# 输入数据到模型里得到输出
pred = model(images)
# 累加loss
test_loss += loss_fn(pred, labels).item()
# 累加正确率
correct += (pred.argmax(1) == labels).sum().item()
# 计算平均loss和准确率
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Train_Accuracy: {(100 * correct):>0.1f}%, Train_Avg_loss: {test_loss:>8f} \n")
return correct, test_loss # 用于绘制图片用

6 验证函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def val(val_dataloader, model, loss_fn):
"""
测试网络

输入:
test_dataloader: 测试集的dataloader
model: 网络模型
loss_fn: 损失函数
"""
# 测试集大小
size = len(val_dataloader.dataset)
# 测试集的batch数量
num_batches = len(val_dataloader)
# 切换到测试模型
model.eval()
# 记录loss和准确率
test_loss, correct = 0, 0
# 梯度截断
with torch.no_grad():
for images, labels in val_dataloader: # 遍历batch
# 加载到device
images, labels = images.to(device), labels.to(device)
# 输入数据到模型里得到输出
pred = model(images)
# 累加loss
test_loss += loss_fn(pred, labels).item()
# 累加正确率
correct += (pred.argmax(1) == labels).sum().item()
# 计算平均loss和准确率
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Val_Accuracy: {(100 * correct):>0.1f}%, Val_Avgloss: {test_loss:>8f} \n")
return correct, test_loss

7 绘图

主要作用是,在每次训练完后,能得到训练集和验证集中的准确率和loss。

代码写的比较冗余,不过问题不大。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def train_correct_plt(epochs, X, Y1, Y2, Y3, Y4):
plt.figure(figsize=(epochs*2, 8), dpi=80)
plt.plot(X, Y1, label="val_correct", color="#FC331D", marker='*', linestyle="-")
plt.plot(X, Y3, label="train_correct", color="#1D3162", marker='*', linestyle="-")
for x_, y_ in zip(X, Y1):
plt.text(x_, y_, y_, ha='left', va='bottom')
for x_, y_ in zip(X, Y3):
plt.text(x_, y_, y_, ha='left', va='bottom')
plt.xticks(X, X)
plt.gca().yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=1))
plt.ylim(0.0, 1.1)
plt.title("result")
plt.xlabel("epoch")
plt.ylabel("correct")
plt.legend()
plt.show()
plt.clf() # 清图。
plt.cla() # 清坐标轴。

plt.plot(X, Y2, label="val_loss", color="#F3991F", marker='o', linestyle="-")
plt.plot(X, Y4, label="train_loss", color="#22a9Fd", marker='o', linestyle="-")
for x_, y_ in zip(X, Y2):
plt.text(x_, y_, y_, ha='left', va='bottom')
for x_, y_ in zip(X, Y4):
plt.text(x_, y_, y_, ha='left', va='bottom')
plt.title("train_result")
plt.ylim(0,max(Y2))
plt.xlabel("epoch")
plt.ylabel("loss")
plt.legend()
plt.show()

8 主函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
if __name__ == '__main__':
# 加载数据集
trian_folder_path = 'data\cat_12_train_new'# 文件夹路径
train_dataset = ImageFolder(trian_folder_path, transform=transform)
print(train_dataset)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True)

# 加载验证集
val_folder_path = 'data\cat_12_val_new'# 文件夹路径
val_dataset = ImageFolder(val_folder_path, transform=eval_transform)
print(val_dataset)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)


# 遍历dataloader
for images, labels in val_dataloader:
print("Shape of X [N, C, H, W]: ", images.shape) # 每个batch数据的形状
print("Shape of y: ", labels.shape) # 每个batch标签的形状
break

# ----------模型----------

# 使用gpu或者cpu进行训练
device = "cuda" if torch.cuda.is_available() else "cpu"
# 打印使用的是gpu/cpu
print("Using {} device".format(device))
# 实例化模型
model = myRes().to(device)
# 打印模型结构
print(model)

optim = optim.Adam(model.parameters(), lr=0.001) # 优化器

# ----------训练/测试----------
# ----------有模型直接导入------------

if os.path.exists(cat_model_name):
model.load_state_dict(torch.load(cat_model_name))

# -----绘图配置------
X = [] # epochs
Y1 = [] # val correct
Y2 = [] # val loss
Y3 = [] # train correct
Y4 = [] # train loss

# 开始训练
for t in range(epochs): # 迭代epochs次
print(f"Epoch {t+1}\n-------------------------------")
# 训练
correct_y3, test_loss_y4 = train(train_dataloader, model, loss_fn, optim)
# 当次训练完后测试目前模型在测试集上的表现
correct_y1, test_loss_y2 = val(val_dataloader, model, loss_fn)

X.append(t+1)
Y1.append(round(correct_y1,3))
Y2.append(round(test_loss_y2,2))
Y3.append(round(correct_y3,3))
Y4.append(round(test_loss_y4,2))


# 绘图
train_correct_plt(epochs, X, Y1, Y2, Y3, Y4)
print("Done!")
# 计算附加指标标准 参数量 FLOPS
input_size = (3, 196, 196)
in_channels = 3 # (通道数,彩图)
# print(summary(model,input_size, device=device )) # 模型架构+参数量
from thop import profile
from thop import clever_format

model = model # 将YourModel替换为你自己的模型
input_size = (196,196)
input = torch.randn(batch_size, in_channels, input_size, input_size)
flops, params = profile(model, inputs=(input,))
flops, params = clever_format([flops, params], '%.3f')
print('Flops: ' + flops + ', Params: ' + params)

# ----------保存模型----------

# 保存模型
torch.save(model.state_dict(), cat_model_name)

cat_train.py 源代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
import torch
from torch import nn,optim
from torch.utils.data import Dataset, DataLoader
from matplotlib import ticker
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torchvision
from torchvision.datasets import ImageFolder, DatasetFolder
import os
from torchsummary import summary # 用于参看模型参数数量

# 种类数
num_classes = 12 # 分类别数量
# 导出模型文件名称
cat_model_name = "cat_model.pth"
# 超参数?
batch_size = 32 # 一次批数
loss_fn = nn.CrossEntropyLoss() # 损失函数设置
epochs = 1 # 训练迭代次数设置

#定义transform
transform = transforms.Compose([
transforms.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05),
# transforms.ColorJitter 改变图像的属性:亮度(brightness)、对比度(contrast)、饱和度(saturation)和色调(hue)。
# 数值表示上下浮动的百分比。比如:0.05,原亮度100 ->(95,105)范围内。
transforms.Resize((256, 256)), # 缩放到指定大小
transforms.CenterCrop(196), # 中心随机裁剪
transforms.RandomHorizontalFlip(p=0.5), # 0.5 概率水平翻转
transforms.RandomVerticalFlip(p=0.5), # 0.5 概率垂直翻转
transforms.ToTensor(), # 转换为张量
transforms.Normalize(mean=[0.4848, 0.4435, 0.4023], std=[0.2744, 0.2688, 0.2757]), # 归一化
])

eval_transform = transforms.Compose([
transforms.Resize((256, 256)), # 缩放到指定大小
transforms.CenterCrop(196), # 中心随机裁剪
transforms.ToTensor(), # 转换为张量
transforms.Normalize(mean =[0.4848, 0.4435, 0.4023], std=[0.2744, 0.2688, 0.2757]), # 归一化
])

class myRes(nn.Module):
def __init__(self):
super(myRes, self).__init__()
# 当前文件夹没有模型时,就导入resnet50原的训练参数True。有模型,直接导入已有模型False。
if os.path.isfile(cat_model_name):
a = False
else:
a = True
self.resnet = torchvision.models.resnet50(pretrained=a)
self.add_module('add_Linear', nn.Linear(1000, num_classes))

def forward(self, x):
x = self.resnet(x)
return x

# 训练函数
def train(train_dataloader, model, loss_fn, optimizer):
"""
训练网络

输入:
train_dataloader: 训练集的dataloader
model: 网络模型
loss_fn: 损失函数
optimizer: 优化器
"""
# 切换到train模式
model.train(True)
# 遍历dataloader
for images, labels in train_dataloader:
# 将数据和标签加载到device上
images, labels = images.to(device), labels.to(device)
# 输入数据到模型里得到输出
optimizer.zero_grad()
pred = model(images)
# 计算输出和标签的loss
loss = loss_fn(pred, labels)
# 反向推导
loss.backward()
# 步进优化器
optimizer.step()

size = len(train_dataloader.dataset)
# 测试集的batch数量
num_batches = len(train_dataloader)
# 切换到测试模型
model.eval()
# 记录loss和准确率
test_loss, correct = 0, 0
# 切换到测试模型
model.eval()
# 记录loss和准确率
test_loss, correct = 0, 0
# 梯度截断
with torch.no_grad():
for images, labels in train_dataloader: # 遍历batch
# 加载到device
images, labels = images.to(device), labels.to(device)
# 输入数据到模型里得到输出
pred = model(images)
# 累加loss
test_loss += loss_fn(pred, labels).item()
# 累加正确率
correct += (pred.argmax(1) == labels).sum().item()
# 计算平均loss和准确率
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Train_Accuracy: {(100 * correct):>0.1f}%, Train_Avg_loss: {test_loss:>8f} \n")
return correct, test_loss

# 测试函数
def val(val_dataloader, model, loss_fn):
"""
测试网络

输入:
test_dataloader: 测试集的dataloader
model: 网络模型
loss_fn: 损失函数
"""
# 测试集大小
size = len(val_dataloader.dataset)
# 测试集的batch数量
num_batches = len(val_dataloader)
# 切换到测试模型
model.eval()
# 记录loss和准确率
test_loss, correct = 0, 0
# 梯度截断
with torch.no_grad():
for images, labels in val_dataloader: # 遍历batch
# 加载到device
images, labels = images.to(device), labels.to(device)
# 输入数据到模型里得到输出
pred = model(images)
# 累加loss
test_loss += loss_fn(pred, labels).item()
# 累加正确率
correct += (pred.argmax(1) == labels).sum().item()
# 计算平均loss和准确率
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Val_Accuracy: {(100 * correct):>0.1f}%, Val_Avgloss: {test_loss:>8f} \n")
return correct, test_loss



def train_correct_plt(epochs, X, Y1, Y2, Y3, Y4):
plt.figure(figsize=(epochs*2, 8), dpi=80)
plt.plot(X, Y1, label="val_correct", color="#FC331D", marker='*', linestyle="-")
plt.plot(X, Y3, label="train_correct", color="#1D3162", marker='*', linestyle="-")
for x_, y_ in zip(X, Y1):
plt.text(x_, y_, y_, ha='left', va='bottom')
for x_, y_ in zip(X, Y3):
plt.text(x_, y_, y_, ha='left', va='bottom')
plt.xticks(X, X)
plt.gca().yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=1))
plt.ylim(0.0, 1.1)
plt.title("result")
plt.xlabel("epoch")
plt.ylabel("correct")
plt.legend()
plt.show()
plt.clf() # 清图。
plt.cla() # 清坐标轴。

plt.plot(X, Y2, label="val_loss", color="#F3991F", marker='o', linestyle="-")
plt.plot(X, Y4, label="train_loss", color="#22a9Fd", marker='o', linestyle="-")
for x_, y_ in zip(X, Y2):
plt.text(x_, y_, y_, ha='left', va='bottom')
for x_, y_ in zip(X, Y4):
plt.text(x_, y_, y_, ha='left', va='bottom')
plt.title("train_result")
plt.ylim(0,max(Y2))
plt.xlabel("epoch")
plt.ylabel("loss")
plt.legend()
plt.show()

if __name__ == '__main__':
# 加载数据集
trian_folder_path = 'data\cat_12_train_new'# 文件夹路径
train_dataset = ImageFolder(trian_folder_path, transform=transform)
print(train_dataset)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True)

# 加载验证集
val_folder_path = 'data\cat_12_val_new'# 文件夹路径
val_dataset = ImageFolder(val_folder_path, transform=eval_transform)
print(val_dataset)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)


# 遍历dataloader
for images, labels in val_dataloader:
print("Shape of X [N, C, H, W]: ", images.shape) # 每个batch数据的形状
print("Shape of y: ", labels.shape) # 每个batch标签的形状
break

# ----------模型----------

# 使用gpu或者cpu进行训练
device = "cuda" if torch.cuda.is_available() else "cpu"
# 打印使用的是gpu/cpu
print("Using {} device".format(device))
# 实例化模型
model = myRes().to(device)
# 打印模型结构
print(model)

optim = optim.Adam(model.parameters(), lr=0.001) # 优化器

# ----------训练/测试----------
# ----------有模型直接导入------------

if os.path.exists(cat_model_name):
model.load_state_dict(torch.load(cat_model_name))

# -----绘图配置------
X = [] # epochs
Y1 = [] # val correct
Y2 = [] # val loss
Y3 = [] # train correct
Y4 = [] # train loss

# 开始训练
for t in range(epochs): # 迭代epochs次
print(f"Epoch {t+1}\n-------------------------------")
# 训练
correct_y3, test_loss_y4 = train(train_dataloader, model, loss_fn, optim)
# 当次训练完后测试目前模型在测试集上的表现
correct_y1, test_loss_y2 = val(val_dataloader, model, loss_fn)

X.append(t+1)
Y1.append(round(correct_y1,3))
Y2.append(round(test_loss_y2,2))
Y3.append(round(correct_y3,3))
Y4.append(round(test_loss_y4,2))


# 绘图
train_correct_plt(epochs, X, Y1, Y2, Y3, Y4)

# 计算附加指标标准 参数量 FLOPS
input_size = (3, 196, 196)
in_channels = 3 # (通道数,彩图)
# print(summary(model,input_size, device=device )) # 模型架构+参数量

# FLOPS
'''
from thop import profile

flops_transform = transforms.Compose([
transforms.Resize((256, 256)), # 缩放到指定大小
transforms.CenterCrop(196), # 中心随机裁剪
transforms.Normalize(mean=[0.4848, 0.4435, 0.4023], std=[0.2744, 0.2688, 0.2757]), # 归一化
])

input_size = (1, 3, 196, 196)
input = torch.randn(input_size).to()
transformed_input = flops_transform(input)
model = myRes()
flops, params = profile(model, inputs=(transformed_input,))
print('FLOPs: %.2fG' % (flops / 1e9))
'''

# ----------保存模型----------

# 保存模型 注意FLOPS生成了新的空模型
torch.save(model.state_dict(), cat_model_name)
print("Done!")

参考文献:

第二名方案

猫十二分类

迁移学习+ResNet50

Deep Residual Learning for Image Recognition