概念
代码实践
1. 准备数据
In [1]
!unzip -oq data/data69186/xiaodu_bear.zip #解压数据集
In [2]
import paddle
paddle.seed(8888)
print(paddle.__version__)
import numpy as np
import PIL.Image as Image
from paddle.vision.datasets import DatasetFolder
from paddle.vision import transforms
# 定义数据预处理
data_transforms = transforms.Compose([
transforms.Resize(size=(224, 224)),
transforms.Transpose(), # HWC -> CHW
transforms.Normalize(
mean=[0, 0, 0], # 归一化
std=[255, 255, 255],
to_rgb=True)
])
# 构建Dataset
class Xiaodu(DatasetFolder):
def __init__(self, path):
super().__init__(path)
def __getitem__(self, index):
img_path, label = self.samples[index]
img = Image.open(img_path)
label = np.array([label]).astype(np.int64)
return data_transforms(img), label
train_dataset = Xiaodu("/home/aistudio/xiaodu_bear/Training")
test_dataset = Xiaodu("/home/aistudio/xiaodu_bear/Test")
2. 搭建网络(高层API版)
In [3]
# Sequential形式组网
MyCNN = paddle.nn.Sequential(
paddle.nn.Conv2D(3, 6 ,3),
paddle.nn.ReLU(),
paddle.nn.MaxPool2D(2),
paddle.nn.Conv2D(6, 9 ,3),
paddle.nn.ReLU(),
paddle.nn.MaxPool2D(2),
paddle.nn.Conv2D(9, 6 ,3),
paddle.nn.ReLU(),
paddle.nn.MaxPool2D(2),
paddle.nn.Conv2D(6, 3 ,3),
paddle.nn.Flatten(),
paddle.nn.Linear(1728, 2),
)
MyCNN_softmax = paddle.nn.Sequential(
MyCNN,
paddle.nn.Softmax(),
)
# 模型结构可视化
paddle.summary(MyCNN, (1, 3, 224, 224))
paddle.summary(MyCNN_softmax, (1, 3, 224, 224))
3. 训练&预估(高层API版)
In [4]
# 实例化模型
inputs = paddle.static.InputSpec(shape=[None, 3, 224, 224], name='inputs')
labels = paddle.static.InputSpec(shape=[None, 2], name='labels')
model = paddle.Model(MyCNN, inputs, labels)
model2 = paddle.Model(MyCNN_softmax, inputs, labels)
# 模型训练相关配置,准备损失计算方法,优化器和精度计算方法
model.prepare(paddle.optimizer.Adam(learning_rate=1e-3, parameters=model2.parameters()),
paddle.nn.CrossEntropyLoss(),
paddle.metric.Accuracy())
# 设置GPU训练
paddle.set_device('gpu:0') # 切换CPU训练 paddle.set_device('cpu')
# 模型训练
model.fit(train_dataset,
epochs=8,
batch_size=20,
verbose=1)
# 保存模型参数
model.save('Hapi_MyCNN') # save for training
model2.save('Hapi_MyCNN', False) # save for inference
# 模型预估
model.evaluate(test_dataset, batch_size=5, verbose=1)
4. 预测
In [5]
import os
import time
import paddle
import zipfile
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
def infer_img(path, model_file_path, use_gpu):
'''
模型预测
'''
#展示预测图片
img = Image.open(path)
plt.imshow(img) #根据数组绘制图像
plt.show() #显示图像
paddle.set_device('gpu:0') if use_gpu else paddle.set_device('cpu')
model = paddle.jit.load(model_file_path)
model.eval() #训练模式
#对预测图片进行预处理
infer_imgs = []
infer_imgs.append(data_transforms(img))
infer_imgs = np.array(infer_imgs)
label_list = test_dataset.classes
for i in range(len(infer_imgs)):
data = infer_imgs[i]
dy_x_data = np.array(data).astype('float32')
dy_x_data = dy_x_data[np.newaxis,:, : ,:]
img = paddle.to_tensor(dy_x_data)
out = model(img)
print(out[0])
# print(paddle.nn.functional.softmax(out)[0]) # 模型中已经包含softmax则不用此行代码。
lab = np.argmax(out.numpy()) #argmax():返回最大数的索引
print("样本: {},被预测为:{}".format(path, label_list[lab]))
print("*********************************************")
In [6]
image_path = []
for root, dirs, files in os.walk('work/'):
# 遍历work/文件夹内图片
for f in files:
image_path.append(os.path.join(root, f))
for i in range(len(image_path)):
infer_img(path=image_path[i], use_gpu=True, model_file_path="Hapi_MyCNN")
time.sleep(0.5) #防止输出错乱
5. 组网 + 训练&预估(低层API对比版)
In [7]
# SubClass形式组网,Layer类继承方式组网
class MyCNN(paddle.nn.Layer):
def __init__(self, is_infer: bool = False):
super(MyCNN,self).__init__()
self.is_infer = is_infer
self.conv1 = paddle.nn.Conv2D(in_channels=3, out_channels=6, kernel_size=3)
self.pool1 = paddle.nn.MaxPool2D(kernel_size=2, stride=2)
self.conv2 = paddle.nn.Conv2D(in_channels=6, out_channels=9, kernel_size=3)
self.pool2 = paddle.nn.MaxPool2D(kernel_size=2, stride=2)
self.conv3 = paddle.nn.Conv2D(in_channels=9, out_channels=6, kernel_size=3)
self.pool3 = paddle.nn.MaxPool2D(kernel_size=2, stride=2)
self.conv4 = paddle.nn.Conv2D(in_channels=6, out_channels=3, kernel_size=3)
self.flatten = paddle.nn.Flatten()
self.linear1 = paddle.nn.Linear(in_features=1728, out_features=2)
self.softmax = paddle.nn.Softmax()
# forward 定义执行实际运行时网络的执行逻辑
def forward(self,x):
# input.shape (batch_size, 3, 224, 224)
x = self.conv1(x)
x = paddle.nn.functional.relu(x)
x = self.pool1(x)
x = self.conv2(x)
x = paddle.nn.functional.relu(x)
x = self.pool2(x)
x = self.conv3(x)
x = paddle.nn.functional.relu(x)
x = self.pool3(x)
x = self.conv4(x)
x = self.flatten(x) # 仅Lite2.8支持该算子
x = self.linear1(x)
if self.is_infer:
x = self.softmax(x)
return x
# 模型结构可视化
paddle.summary(MyCNN(is_infer=False), (1, 3, 224, 224))
paddle.summary(MyCNN(is_infer=True), (1, 3, 224, 224))
In [8]
#训练数据加载
train_loader = paddle.io.DataLoader(train_dataset, batch_size=20, shuffle=True)
#测试数据加载
test_loader = paddle.io.DataLoader(test_dataset, batch_size=5, shuffle=False)
# 定义训练过程
def train(model ,use_gpu, test):
paddle.set_device('gpu:0') if use_gpu else paddle.set_device('cpu')
# para_state_dict = paddle.load('MyCNN.pdparams')
# model.set_state_dict(para_state_dict) #加载模型参数
cross_entropy = paddle.nn.CrossEntropyLoss()
opt = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters())
print('start training ... ')
epochs_num = 8 #迭代次数
for pass_num in range(epochs_num):
model.train() #训练模式
for batch_id, data in enumerate(train_loader()):
image_data, label_data = data
image = paddle.to_tensor(image_data)
label = paddle.to_tensor(label_data)
predict = model(image) #数据传入model
# 计算损失函数
loss = cross_entropy(predict, label)
avg_loss = paddle.mean(loss)
#计算精度
acc = paddle.metric.accuracy(predict, label)
if batch_id != 0 and batch_id % 5 == 0:
print("train_pass:{}, batch_id:{}, train_loss:{}, train_acc:{}".format(pass_num, batch_id, avg_loss.numpy(), acc.numpy()))
# 反向传播,更新权重,清除梯度
avg_loss.backward()
opt.step()
opt.clear_grad() #opt.clear_grad()来重置梯度
if test:
model.eval()
accuracies = []
losses = []
for batch_id, data in enumerate(test_loader()):#测试集
image_data, label_data = data
image = paddle.to_tensor(image_data)
label = paddle.to_tensor(label_data)
predict = model(image) #数据传入model
# 计算损失函数
loss = cross_entropy(predict, label)
#计算精度
acc = paddle.metric.accuracy(predict, label)
accuracies.append(acc.numpy()[0])
avg_acc = np.mean(accuracies)
losses.append(loss.numpy()[0])
avg_loss = np.mean(losses)
print("avg_test_loss:" + str(avg_loss), "\t\t" "avg_test_accs:" + str(avg_acc))
print("===========================================================================")
#保存动态图参数, 用于调试
paddle.save(model.state_dict(),'MyCNN.pdparams')
paddle.save(opt.state_dict(),'MyCNN.pdopt')
# 启动训练
train(model=MyCNN(is_infer=False), use_gpu=True, test=False)
In [9]
# 动态图加载模型 评估测试
def evaluation(use_gpu, model_file_path):
paddle.set_device('gpu:0') if use_gpu else paddle.set_device('cpu')
accs = []
para_state_dict = paddle.load(model_file_path)
model = MyCNN(is_infer=True)
model.set_state_dict(para_state_dict) #加载模型参数
model.eval()
print('start evaluation .......')
for batch_id,data in enumerate(test_loader()):#测试集
image_data, label_data = data
image = paddle.to_tensor(image_data)
label = paddle.to_tensor(label_data)
predict=model(image)
acc=paddle.metric.accuracy(predict,label)
accs.append(acc.numpy()[0])
avg_acc = np.mean(accs)
print(avg_acc)
#保存静态图模型, 用于部署
input_spec = paddle.static.InputSpec(shape=[None, 3, 224, 224], name='img') # 定制化预测模型导出
model = paddle.jit.to_static(model, input_spec=[input_spec])
paddle.jit.save(model, "MyCNN")
#模型评估
evaluation(use_gpu=True, model_file_path="MyCNN.pdparams")
In [10]
# 静态图加载模型 评估测试
def evaluation(use_gpu, model_file_path):
paddle.set_device('gpu:0') if use_gpu else paddle.set_device('cpu')
accs = []
model = paddle.jit.load(model_file_path)
model.eval()
print('start evaluation .......')
for batch_id,data in enumerate(test_loader()):#测试集
image_data, label_data = data
image = paddle.to_tensor(image_data)
label = paddle.to_tensor(label_data)
predict=model(image)
acc=paddle.metric.accuracy(predict,label)
accs.append(acc.numpy()[0])
avg_acc = np.mean(accs)
print(avg_acc)
#模型评估
evaluation(use_gpu=True, model_file_path="MyCNN")
In [11]
image_path = []
for root, dirs, files in os.walk('work/'):
# 遍历work/文件夹内图片
for f in files:
image_path.append(os.path.join(root, f))
for i in range(len(image_path)):
infer_img(path=image_path[i], use_gpu=True, model_file_path="MyCNN")
time.sleep(0.5) #防止输出错乱
6 . OPT工具转化Paddle Lite模型
In [13]
!chmod +x opt-v2.8
!./opt-v2.8 --model_file=MyCNN.pdmodel --param_file=MyCNN.pdiparams --optimize_out=bear
更多推荐
paddle产品部署
发布评论