想实时检测视频中感兴趣的物体，并通过视频的方式展现出来吗？

如果答案是“想”，那么请往下看
前文讲过，如何使用PaddleDetection训练自己的数据集

那么如何将最后的检测效果以视频的形式展现出来呢？

解决思路：将视频按帧抽取存为图片，输入自己训练好的目标检测模型进行预测，将得到的物体预测框画在图片上，保存图片，最后将图片合成视频。
本文主要参考了博客https://blog.csdn/yzl819819/article/details/104743106
以及视频人脸检测项目https://aistudio.baidu/aistudio/projectdetail/757390
1.首先导出训练好的模型及训练参数，使用PaddleDetection的export_model.py

# 导出模型
!python PaddleDetection/tools/export_model.py -c PaddleDetection/configs/yolov4/yolov4_cspdarknet_voc.yml -o weights=output/yolov4_cspdarknet_voc/best_model.pdparams

得到三个文件，如图

2.将PaddleDetection中的infer.py封装为一个类inference()，用于预测单张图片

import numpy as np
import time
import cv2
import paddle.fluid as fluid
from PIL import Image
from PIL import ImageDraw
# 画图展示目标物体边界框
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.image import imread
import math

# # 定义画矩形框的程序    
# def draw_rectangle(currentAxis,bbox, edgecolor = 'red', facecolor = 'yellow', fill=False, linestyle='-'):
#     # currentAxis，坐标轴，通过plt.gca()获取
#     # bbox，边界框，包含四个数值的list， [x1, y1, x2, y2]
#     # edgecolor，边框线条颜色
#     # facecolor，填充颜色
#     # fill, 是否填充
#     # linestype，边框线型
#     # patches.Rectangle需要传入左上角坐标、矩形区域的宽度、高度等参数
#     rect=patches.Rectangle((bbox[0], bbox[1]), bbox[2]-bbox[0]+1, bbox[3]-bbox[1]+1, linewidth=2,
#                            edgecolor=edgecolor,facecolor=facecolor,fill=fill, linestyle=linestyle)
#     currentAxis.add_patch(rect)

train_parameters = {
    "label_dict": {0:"HS",1:"DS",2:"B",3:"SMac",4:"TelP",5:"HSN",6:"DSN",7:"Number of TelP",8:"Unclear"},
    "use_gpu": True,
    "anchors": [[12, 16], [19, 36], [40, 28], [36, 75], [76, 55],[72, 146], [142, 110], [192, 243], [459, 401]],
    "anchor_mask": [[0, 1, 2], [3, 4, 5], [6, 7, 8]],
    "input_size": [3, 608, 608],    # 原版的边长大小为608，为了提高训练速度和预测速度，此处压缩为448
}

target_size = train_parameters['input_size']
anchors = train_parameters['anchors']
anchor_mask = train_parameters['anchor_mask']
label_dict = train_parameters['label_dict']
print(label_dict[1])

place = fluid.CUDAPlace(0) if train_parameters['use_gpu'] else fluid.CPUPlace()
exe = fluid.Executor(place)
path="output/yolov4_cspdarknet_voc"##存放步骤1导出模型的路径
[inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(dirname=path, executor=exe,model_filename='__model__', params_filename='__params__')


class inference():
    def __init__(self):
        print("8888888888")

    def draw_bbox_image(self, img, boxes, labels,scores, save_name):
        print(save_name)
        """
        给图片画上外接矩形框
        :param img:
        :param boxes:
        :param save_name:
        :param labels
        :return:
        """
       
        draw = ImageDraw.Draw(img)
        # plt.figure(figsize=(10, 10))
        # plt.imshow(img)
        # currentAxis=plt.gca()
        colors = ['red', 'green', 'blue', 'violet', 'yellow', 'darkblue', 'purple','orange','brown']
        for box, label,score in zip(boxes, labels, scores):
            print(box, label, score)
            if(score >0.7):
                xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
                # draw_rectangle(currentAxis, box, edgecolor =colors[label])
                draw.rectangle((xmin, ymin, xmax, ymax), fill=None, outline=colors[label],width=2)
                draw.text((xmin, ymin), label_dict[label], colors[label])
        img.save(save_name)

    def resize_img(self, img, target_size):#将图片resize到target_size
        """
        保持比例的缩放图片
        :param img:
        :param target_size:
        :return:
        """
        img = img.resize(target_size[1:], Image.BILINEAR)
        return img


    def read_image(self,img_path):

        origin = Image.open(img_path)
        img = self.resize_img(origin, target_size)
        resized_img = img.copy()
        if img.mode != 'RGB':
            img = img.convert('RGB')
        img = np.array(img).astype('float32').transpose((2, 0, 1))  # HWC to CHW 让矩阵进行方向的转置
        img = img / 255.0

        img[0, :, :] -= 0
        img[1, :, :] -= 0
        img[2, :, :] -= 0

        img[0, :, :] /=1
        img[1, :, :] /=1
        img[2, :, :] /=1
        img = img[np.newaxis, :]
        return origin, img, resized_img

    def infer(self, image_path, idx):
        """
        预测，将结果保存到一副新的图片中
        :param image_path:
        :return:
        """
        origin, tensor_img, resized_img = self.read_image(image_path)
        input_w, input_h = origin.size[0], origin.size[1]
        image_shape = np.array([input_h, input_w], dtype='int32')
        t1 = time.time()
        batch_outputs = exe.run(inference_program,
                                feed={feed_target_names[0]: tensor_img,
                                      feed_target_names[1]: image_shape[np.newaxis, :]},
                                fetch_list=fetch_targets,
                                return_numpy=False)

        period = time.time() - t1
        print("predict cost time:{0}".format("%2.2f sec" % period))
        bboxes = np.array(batch_outputs[0])

        if bboxes.shape[1] != 6:
            print("No object found in {}".format(image_path))
            return
        labels = bboxes[:, 0].astype('int32')
        scores = bboxes[:, 1].astype('float32')
        boxes = bboxes[:, 2:].astype('float32')

        last_dot_index = image_path.rfind('.')
        # idx = image_path[last_dot_index-4:last_dot_index]
        out_path = 'video2img/output/' #视频转为图片的输出路径
        out_path =out_path + idx 
        # out_path = 'video2img/output'
        self.draw_bbox_image(origin, boxes, labels, scores, out_path)

# if __name__ == '__main__':
#     image_path= "work/VOC2012/JPEGImages/4370.jpg"
#     a=inference()
#     a.infer(image_path)

3.将视频抽帧为图像，画预测框，将输出图像合称为视频

import cv2
import os

def CutVideo2Image(video_path, img_path):
    #将视频输出为图像
    #video_path为输入视频文件路径
    #img_path为输出图像文件夹路径
    cap = cv2.VideoCapture(video_path)
    index = 0
    while(True):
        ret,frame = cap.read() 
        if ret:
            cv2.imwrite(img_path+'/%d.jpg'%index, frame)
            index += 1
        else:
            break
    cap.release()
    
def GetObj(in_path, out_path):
    #物体检测
    #in_path为输入图像文件夹的路径
    #out_path为输出图像文件夹的路径
    files = os.listdir(in_path)    
    bbox_buffer = []
    count = 0  #统计人数
    a=inference()
    
    for i in range(len(files)):
        #文件中的每张图片
        img_paths = os.path.join(in_path+'/%d.jpg' % i)
        result = a.infer(img_paths,'/%d.jpg' % i)
           
def CombVideo(in_path, out_path, size):
    #将图片合成视频
    #in_path为输入图像文件夹路径
    #out_path为输出视频文件路径
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(out_path,fourcc, 30.0, size)
    files = os.listdir(in_path)

    for i in range(len(files)):
        img = cv2.imread(in_path + '/%d.jpg' % i)
        # img = cv2.resize(img, size)
        out.write(img)
    out.release()

video_initial = '4.mp4'  #视频路径
video_finish = 'video4_finish.mp4'   #合成视频路径
images_initial = 'video2img/input'   #视频抽取得到的图像输出路径
images_final = 'video2img/output'    #存放画完预测框的图像路径
if __name__ == '__main__':
    CutVideo2Image(video_initial, images_initial)
    GetObj(images_initial, images_final)
    CombVideo(images_final, video_finish, (1920,1080))