目录
0 前言
1 数据增强的实现
1.1 贴背景
1.2 随机旋转
1.3 随机色调变换
1.4 随机透视变换
1.5 完整代码
2 总结
前一段时间在做目标检测任务,由于训练数据较少,需要对已有的数据进行离线增强。
那么什么是数据增强呢?Data Augmentation
,基于有限的数据生成更多等价(同样有效)的数据,丰富训练数据的分布,使通过训练集得到的模型泛化能力更强。
数据增强可以分为两类,离线增强和在线增强。离线增强 : 直接对数据集进行处理,数据的数目会变成增强因子乘以原数据集的数目,这种方法常常用于数据集很小的时候。在线增强 : 这种增强的方法用于,获得 batch 数据之后,然后对这个 batch 的数据进行增强,如旋转、平移、翻折等相应的变化,由于有些数据集不能接受线性级别的增长,这种方法长用于大的数据集,很多机器学习框架已经支持了这种数据增强方式,并且可以使用 GPU 优化计算。
数据增强让有限的数据产生更多的数据,增加训练样本的数量以及多样性(噪声数据),提升模型鲁棒性。神经网络需要大量的参数,许许多多的神经网路的参数都是数以百万计,而使得这些参数可以正确工作则需要大量的数据进行训练,但在很多实际的项目中,我们难以找到充足的数据来完成任务。此外,随机改变训练样本可以降低模型对某些属性的依赖,从而提高模型的泛化能力。
视频讲解地址:【深度学习】【数据增强】【目标检测】带或不带标注框的图片离线增强的实现(贴背景、随机旋转、随机色调变换、随机透视变换)(附源码)_哔哩哔哩 (゜-゜)つロ 干杯~-bilibili
数据增强主要有仿射变换、透视变换、色调变换等等,对于目标检测任务,一些数据增强方式是不可用的,如裁剪,因为很容易导致目标丢失。我在数据增强中常用贴背景、随机旋转、随机色调变换、随机透视变换四种方法。
以下代码中的标注框均是四点标注的四边形,非水平矩形框。按照左上、右上、右下、左下的顺序排列四点,即顺时针方向,八个坐标点:x0,y0,x1,y1,x2,y2,x3,y3x0,y0,x1,y1,x2,y2,x3,y3。
可以先将标注框内的目标裁剪出来,然后贴到各种各样的背景图上,生成新的数据。
def add_background_randomly(image, background, box_list=[]): """ box_list = [(cls_type_0, rect_0), (cls_type_1, rect_1), ... , (cls_type_n, rect_n)] rect = [x0, y0, x1, y1, x2, y2, x3, y3] left_top = (x0, y0), right_top = (x1, y1), right_bottom = (x2, y2), left_bottom = (x3, y3) """ img_height, img_width = image.shape[:2] bg_height, bg_width = background.shape[:2] # resize image smaller to background # the image accounts for at least two-thirds and not more than four-fifths min_size = min(bg_height, bg_width) // 3 * 2 max_size = min(bg_height, bg_width) // 5 * 4 new_size = random.randint(min_size, max_size) resize_multiple = round(new_size / max(img_height, img_width), 4) # image = image.resize((int(img_width * resize_multiple), int(img_height * resize_multiple)), Image.ANTIALIAS) image = cv2.resize(image, (int(img_width * resize_multiple), int(img_height * resize_multiple))) img_height, img_width = image.shape[:2] # paste the image to the background # height_pos = random.randint((bg_height-img_height)//3, (bg_height-img_height)//3*2) # width_pos = random.randint((bg_width-img_width)//3, (bg_width-img_width)//3*2) height_pos = random.randint(0, (bg_height-img_height)) width_pos = random.randint(0, (bg_width-img_width)) background[height_pos:(height_pos+img_height), width_pos:(width_pos+img_width)] = image img_height, img_width = background.shape[:2] # calculate the boxes after adding background new_box_list = [] for cls_type, rect in box_list: for coor_index in range(len(rect)//2): # resize rect[coor_index*2] = int(rect[coor_index*2] * resize_multiple) # x rect[coor_index*2+1] = int(rect[coor_index*2+1] * resize_multiple) # y # paste rect[coor_index*2] += width_pos # x rect[coor_index*2+1] += height_pos # y # limite rect[coor_index*2] = max(min(rect[coor_index*2], img_width), 0) # x rect[coor_index*2+1] = max(min(rect[coor_index*2+1], img_height), 0)# y box = (cls_type, rect) new_box_list.append(box) image_with_boxes = [background, new_box_list] return image_with_boxes
对图片进行任意角度的旋转,标注框也随之旋转,旋转后需要外扩图片宽度和高度,避免被裁剪,旋转后的背景可以填充任意颜色。
def rotate_image(image, label_box_list=[], angle=90, color=(0, 0, 0), img_scale=1.0): """ rotate with angle, background filled with color, default black (0, 0, 0) label_box = (cls_type, box) box = [x0, y0, x1, y1, x2, y2, x3, y3] """ # grab the rotation matrix (applying the negative of the angle to rotate clockwise), # then grab the sine and cosine (i.e., the rotation components of the matrix) # if angle < 0, counterclockwise rotation; if angle > 0, clockwise rotation # 1.0 - scale, to adjust the size scale (image scaling parameter), recommended 0.75 height_ori, width_ori = image.shape[:2] x_center_ori, y_center_ori = (width_ori // 2, height_ori // 2) rotation_matrix = cv2.getRotationMatrix2D((x_center_ori, y_center_ori), angle, img_scale) cos = np.abs(rotation_matrix[0, 0]) sin = np.abs(rotation_matrix[0, 1]) # compute the new bounding dimensions of the image width_new = int((height_ori * sin) + (width_ori * cos)) height_new = int((height_ori * cos) + (width_ori * sin)) # adjust the rotation matrix to take into account translation rotation_matrix[0, 2] += (width_new / 2) - x_center_ori rotation_matrix[1, 2] += (height_new / 2) - y_center_ori # perform the actual rotation and return the image # borderValue - color to fill missing background, default black, customizable image_new = cv2.warpAffine(image, rotation_matrix, (width_new, height_new), borderValue=color) # each point coordinates angle = angle / 180 * math.pi box_rot_list = cal_rotate_box(label_box_list, angle, (x_center_ori, y_center_ori), (width_new//2, height_new//2)) box_new_list = [] for cls_type, box_rot in box_rot_list: for index in range(len(box_rot)//2): box_rot[index*2] = int(box_rot[index*2]) box_rot[index*2] = max(min(box_rot[index*2], width_new), 0) box_rot[index*2+1] = int(box_rot[index*2+1]) box_rot[index*2+1] = max(min(box_rot[index*2+1], height_new), 0) box_new_list.append((cls_type, box_rot)) image_with_boxes = [image_new, box_new_list] return image_with_boxes def cal_rotate_box(box_list, angle, ori_center, new_center): # box = [x0, y0, x1, y1, x2, y2, x3, y3] # image_shape - [width, height] box_list_new = [] for (cls_type, box) in box_list: box_new = [] for index in range(len(box)//2): box_new.extend(cal_rotate_coordinate(box[index*2], box[index*2+1], angle, ori_center, new_center)) label_box = (cls_type, box_new) box_list_new.append(label_box) return box_list_new def cal_rotate_coordinate(x_ori, y_ori, angle, ori_center, new_center): # box = [x0, y0, x1, y1, x2, y2, x3, y3] # image_shape - [width, height] x_0 = x_ori - ori_center[0] y_0 = ori_center[1] - y_ori x_new = x_0 * math.cos(angle) - y_0 * math.sin(angle) + new_center[0] y_new = new_center[1] - (y_0 * math.cos(angle) + x_0 * math.sin(angle)) return (x_new, y_new)
对图片进行色调变换,包括:亮度、对比度、饱和度、色调。可调节变换的概率。
def hue_change(image): if np.random.rand() < 0.8: image = transforms.ColorJitter(brightness=0.5)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(contrast=0.2)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(saturation=0.2)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(hue=0.2)(image) return image
对图片进行任意的透视变换,标注框也随之变换,变换后的背景可以填充任意颜色。
1 def perspective_tranform(image, perspective_rate=0.5, label_box_list=[]): 2 # perspective transform 3 img_height, img_width = image.shape[:2] 4 # points_src = np.float32([[rect[0], rect[1]], [rect[2], rect[3]], [rect[4], rect[5]], [rect[6], rect[7]]]) 5 points_src = np.float32([[0, 0], [img_width-1, 0], [img_width-1, img_height-1], [0, img_height-1]]) 6 max_width = int(img_width * (1.0 + perspective_rate)) 7 max_height = int(img_height * (1.0 + perspective_rate)) 8 min_width = int(img_width * (1.0 - perspective_rate)) 9 min_height = int(img_height * (1.0 + perspective_rate)) 10 delta_width = (max_width - min_width) // 2 11 delta_height = (max_height - min_height) // 2 12 x0 = random.randint(0, delta_width) 13 y0 = random.randint(0, delta_height) 14 x1 = random.randint(delta_width + min_width, max_width) 15 y1 = random.randint(0, delta_height) 16 x2 = random.randint(delta_width + min_width, max_width) 17 y2 = random.randint(delta_height + min_height, max_height) 18 x3 = random.randint(0, delta_width) 19 y3 = random.randint(delta_height + min_height, max_height) 20 points_dst = np.float32([[x0, y0], [x1, y1], [x2, y2], [x3, y3]]) 21 # width_new = max(x0, x1, x2, x3) - min(x0, x1, x2, x3) 22 # height_new = max(y0, y1, y2, y3) - min(y0, y1, y2, y3) 23 M = cv2.getPerspectiveTransform(points_src, points_dst) 24 image_res = cv2.warpPerspective(image, M, (max_width, max_height)) 25 # cut 26 image_new = image_res[min(y0, y1):max(y2, y3), min(x0, x3):max(x1, x2)] 27 28 # labels 29 box_new_list = [] 30 for cls_type, box in label_box_list: 31 # after transformation 32 for index in range(len(box)//2): 33 px = (M[0][0]*box[index*2] + M[0][1]*box[index*2+1] + M[0][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2])) 34 py = (M[1][0]*box[index*2] + M[1][1]*box[index*2+1] + M[1][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2])) 35 box[index*2] = int(px) 36 box[index*2+1] = int(py) 37 # cut 38 box[index*2] -= min(x0, x3) 39 box[index*2+1] -= min(y0, y1) 40 box[index*2] = max(min(box[index*2], image_new.shape[1]), 0) 41 box[index*2+1] = max(min(box[index*2+1], image_new.shape[0]), 0) 42 box_new_list.append((cls_type, box)) 43 44 image_with_boxes = [image_new, box_new_list] 45 46 return image_with_boxes
import os import random from PIL import Image, ImageOps from tqdm import tqdm import torchvision.transforms as transforms import cv2 import numpy as np import math import shutil def add_background_randomly(image, background, box_list=[]): """ box_list = [(cls_type_0, rect_0), (cls_type_1, rect_1), ... , (cls_type_n, rect_n)] rect = [x0, y0, x1, y1, x2, y2, x3, y3] left_top = (x0, y0), right_top = (x1, y1), right_bottom = (x2, y2), left_bottom = (x3, y3) """ img_height, img_width = image.shape[:2] bg_height, bg_width = background.shape[:2] # resize image smaller to background # the image accounts for at least two-thirds and not more than four-fifths min_size = min(bg_height, bg_width) // 3 * 2 max_size = min(bg_height, bg_width) // 5 * 4 new_size = random.randint(min_size, max_size) resize_multiple = round(new_size / max(img_height, img_width), 4) # image = image.resize((int(img_width * resize_multiple), int(img_height * resize_multiple)), Image.ANTIALIAS) image = cv2.resize(image, (int(img_width * resize_multiple), int(img_height * resize_multiple))) img_height, img_width = image.shape[:2] # paste the image to the background # height_pos = random.randint((bg_height-img_height)//3, (bg_height-img_height)//3*2) # width_pos = random.randint((bg_width-img_width)//3, (bg_width-img_width)//3*2) height_pos = random.randint(0, (bg_height-img_height)) width_pos = random.randint(0, (bg_width-img_width)) background[height_pos:(height_pos+img_height), width_pos:(width_pos+img_width)] = image img_height, img_width = background.shape[:2] # calculate the boxes after adding background new_box_list = [] for cls_type, rect in box_list: for coor_index in range(len(rect)//2): # resize rect[coor_index*2] = int(rect[coor_index*2] * resize_multiple) # x rect[coor_index*2+1] = int(rect[coor_index*2+1] * resize_multiple) # y # paste rect[coor_index*2] += width_pos # x rect[coor_index*2+1] += height_pos # y # limite rect[coor_index*2] = max(min(rect[coor_index*2], img_width), 0) # x rect[coor_index*2+1] = max(min(rect[coor_index*2+1], img_height), 0)# y box = (cls_type, rect) new_box_list.append(box) image_with_boxes = [background, new_box_list] return image_with_boxes def rotate_image(image, label_box_list=[], angle=90, color=(0, 0, 0), img_scale=1.0): """ rotate with angle, background filled with color, default black (0, 0, 0) label_box = (cls_type, box) box = [x0, y0, x1, y1, x2, y2, x3, y3] """ # grab the rotation matrix (applying the negative of the angle to rotate clockwise), # then grab the sine and cosine (i.e., the rotation components of the matrix) # if angle < 0, counterclockwise rotation; if angle > 0, clockwise rotation # 1.0 - scale, to adjust the size scale (image scaling parameter), recommended 0.75 height_ori, width_ori = image.shape[:2] x_center_ori, y_center_ori = (width_ori // 2, height_ori // 2) rotation_matrix = cv2.getRotationMatrix2D((x_center_ori, y_center_ori), angle, img_scale) cos = np.abs(rotation_matrix[0, 0]) sin = np.abs(rotation_matrix[0, 1]) # compute the new bounding dimensions of the image width_new = int((height_ori * sin) + (width_ori * cos)) height_new = int((height_ori * cos) + (width_ori * sin)) # adjust the rotation matrix to take into account translation rotation_matrix[0, 2] += (width_new / 2) - x_center_ori rotation_matrix[1, 2] += (height_new / 2) - y_center_ori # perform the actual rotation and return the image # borderValue - color to fill missing background, default black, customizable image_new = cv2.warpAffine(image, rotation_matrix, (width_new, height_new), borderValue=color) # each point coordinates angle = angle / 180 * math.pi box_rot_list = cal_rotate_box(label_box_list, angle, (x_center_ori, y_center_ori), (width_new//2, height_new//2)) box_new_list = [] for cls_type, box_rot in box_rot_list: for index in range(len(box_rot)//2): box_rot[index*2] = int(box_rot[index*2]) box_rot[index*2] = max(min(box_rot[index*2], width_new), 0) box_rot[index*2+1] = int(box_rot[index*2+1]) box_rot[index*2+1] = max(min(box_rot[index*2+1], height_new), 0) box_new_list.append((cls_type, box_rot)) image_with_boxes = [image_new, box_new_list] return image_with_boxes def cal_rotate_box(box_list, angle, ori_center, new_center): # box = [x0, y0, x1, y1, x2, y2, x3, y3] # image_shape - [width, height] box_list_new = [] for (cls_type, box) in box_list: box_new = [] for index in range(len(box)//2): box_new.extend(cal_rotate_coordinate(box[index*2], box[index*2+1], angle, ori_center, new_center)) label_box = (cls_type, box_new) box_list_new.append(label_box) return box_list_new def cal_rotate_coordinate(x_ori, y_ori, angle, ori_center, new_center): # box = [x0, y0, x1, y1, x2, y2, x3, y3] # image_shape - [width, height] x_0 = x_ori - ori_center[0] y_0 = ori_center[1] - y_ori x_new = x_0 * math.cos(angle) - y_0 * math.sin(angle) + new_center[0] y_new = new_center[1] - (y_0 * math.cos(angle) + x_0 * math.sin(angle)) return (x_new, y_new) def hue_change(image): if np.random.rand() < 0.8: image = transforms.ColorJitter(brightness=0.5)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(contrast=0.2)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(saturation=0.2)(image) if np.random.rand() < 0.2: image = transforms.ColorJitter(hue=0.2)(image) return image def perspective_tranform(image, perspective_rate=0.5, label_box_list=[]): # perspective transform img_height, img_width = image.shape[:2] # points_src = np.float32([[rect[0], rect[1]], [rect[2], rect[3]], [rect[4], rect[5]], [rect[6], rect[7]]]) points_src = np.float32([[0, 0], [img_width-1, 0], [img_width-1, img_height-1], [0, img_height-1]]) max_width = int(img_width * (1.0 + perspective_rate)) max_height = int(img_height * (1.0 + perspective_rate)) min_width = int(img_width * (1.0 - perspective_rate)) min_height = int(img_height * (1.0 + perspective_rate)) delta_width = (max_width - min_width) // 2 delta_height = (max_height - min_height) // 2 x0 = random.randint(0, delta_width) y0 = random.randint(0, delta_height) x1 = random.randint(delta_width + min_width, max_width) y1 = random.randint(0, delta_height) x2 = random.randint(delta_width + min_width, max_width) y2 = random.randint(delta_height + min_height, max_height) x3 = random.randint(0, delta_width) y3 = random.randint(delta_height + min_height, max_height) points_dst = np.float32([[x0, y0], [x1, y1], [x2, y2], [x3, y3]]) # width_new = max(x0, x1, x2, x3) - min(x0, x1, x2, x3) # height_new = max(y0, y1, y2, y3) - min(y0, y1, y2, y3) M = cv2.getPerspectiveTransform(points_src, points_dst) image_res = cv2.warpPerspective(image, M, (max_width, max_height)) # cut image_new = image_res[min(y0, y1):max(y2, y3), min(x0, x3):max(x1, x2)] # labels box_new_list = [] for cls_type, box in label_box_list: # after transformation for index in range(len(box)//2): px = (M[0][0]*box[index*2] + M[0][1]*box[index*2+1] + M[0][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2])) py = (M[1][0]*box[index*2] + M[1][1]*box[index*2+1] + M[1][2]) / ((M[2][0]*box[index*2] + M[2][1]*box[index*2+1] + M[2][2])) box[index*2] = int(px) box[index*2+1] = int(py) # cut box[index*2] -= min(x0, x3) box[index*2+1] -= min(y0, y1) box[index*2] = max(min(box[index*2], image_new.shape[1]), 0) box[index*2+1] = max(min(box[index*2+1], image_new.shape[0]), 0) box_new_list.append((cls_type, box)) image_with_boxes = [image_new, box_new_list] return image_with_boxes if __name__ == "__main__": # test img_test_path = os.path.join(test_path, file_name) points = np.array([[rect[0],rect[1]], [rect[2],rect[3]], [rect[4],rect[5]], [rect[6],rect[7]]], np.int32) image_rect = cv2.polylines(image_res, pts=[points], isClosed=True, color=(0,0,255), thickness=3) cv2.imwrite(img_test_path, image_res) # print("")
如果能采集到足够丰富的数据,可以不用进行数据增强即可训练得到很好的模型。但如果数据量很少,那么数据增强是一个提高模型准确率和泛化能力的很好的方式。
转载;https://blog.csdn.net/sinat_16020825/article/details/116521711