本项目主要根据 github 项目 对如何接入 TIPC、如何支持 serving 进行介绍,关于更多原项目的信息可以查看 README.md
进行了解。另外,也可以参考官方的 TIPC 和 Serving 的示例来将自己的模型接入 TIPC 和进行 Serving 支持。
其实,我们想做的就是用一个 shell 脚本去读取 txt 文件,然后到达 train、eval、动转静和 infer 的目的。我们希望只通过运行一个 shell 脚本,让使用者可以完成上述的一项或者多项功能。
那么既然我们要进行 train、eval、动转静和 infer,首先我们得写好他们的 py 脚本文件。一般对应的分别是 train.py、eval.py、export_model.py 和 infer.py,前面两个我们一般在写算法的时候都会写,所以我来介绍一下后面两个脚本文件。
这里其实主要是指模型由动态图转为静态图供后面的 inference 等使用。具体详细代码可以根据目录 ppdet/engine/trainer.py
中的第 531 行进行修改。:
import os import paddle from paddle.static import InputSpec def export(model, model_name, output_dir): # 将模型调整为 eval 模式 model.eval() # 设置保存静态图模型的地址 save_dir = os.path.join(output_dir, model_name) if not os.path.exists(save_dir): os.makedirs(save_dir) # 设置输入 shape,如果是动态 shape 则设置为 [3, -1, -1] image_shape = [3, -1, -1] # 设置需要的其他输入 input_spec = [{ "image": InputSpec( shape=[None] + image_shape, name='image'), "im_shape": InputSpec( shape=[None, 2], name='im_shape'), "scale_factor": InputSpec( shape=[None, 2], name='scale_factor') }] static_model = paddle.jit.to_static(self.model, input_spec=input_spec) # NOTE: dy2st do not pruned program, but jit.save will prune program # input spec, prune input spec here and save with pruned input spec pruned_input_spec = _prune_input_spec( input_spec, static_model.forward.main_program, static_model.forward.outputs ) # 保存模型 paddle.jit.save( static_model, os.path.join(save_dir, 'model'), input_spec=pruned_input_spec ) def _prune_input_spec(input_spec, program, targets): # try to prune static program to figure out pruned input spec # so we perform following operations in static mode paddle.enable_static() pruned_input_spec = [{}] program = program.clone() program = program._prune(targets=targets) global_block = program.global_block() for name, spec in input_spec[0].items(): try: v = global_block.var(name) pruned_input_spec[0][name] = spec except Exception: pass paddle.disable_static() return pruned_input_spec
代码编写完成后,执行 tools/export_model.py
进行测试,静态图模型会被保存至 output_inference/retinanet_r50_fpn_1x_coco
(具体保存位置根据自己编写的程序而定)。
# 安装所需依赖 !pip install -r requirements.txt
!python tools/export_model.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o weights=/home/aistudio/data/data104154/best_model.pdparams
这里可以使用 argparse 来接收用命令行启动时输入的参数。关于这部分以及更多可视化、视频推理和预处理 operator 的编写,可以参考目录:deploy/python/infer.py
、deploy/python/preprocess.py
import os import yaml import numpy as np import math import paddle from paddle.inference import Config from paddle.inference import create_predictor # RetinaNet-Based-on-PPdet-main/deploy/python/utils.py # import 一些参数的设置、时间计算类、内存使用计算类 from utils import argsparser, Timer, get_current_memory_mb class Detector(object): def __init__(self, pred_config, model_dir, device='CPU', run_mode='fluid', batch_size=1, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, trt_calib_mode=False, cpu_threads=1, enable_mkldnn=False): self.pred_config = pred_config # 初始化 predictor 主要进行一些 cpu\gpu、Mkldnn\TensorRT 等相关设置 self.predictor, self.config = load_predictor( model_dir, run_mode=run_mode, batch_size=batch_size, min_subgraph_size=self.pred_config.min_subgraph_size, device=device, use_dynamic_shape=self.pred_config.use_dynamic_shape, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, trt_opt_shape=trt_opt_shape, trt_calib_mode=trt_calib_mode, cpu_threads=cpu_threads, enable_mkldnn=enable_mkldnn) # 这两个函数用来记录时间和内存 self.det_times = Timer() self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 # 预处理函数 def preprocess(self, image_list): # 这里主要是进行对 operator 进行设置 # 如果预处理固定,也可以参考以下 link,直接 Compose 在一起就行了 # https://github.com/littletomatodonkey/AlexNet-Prod/blob/tipc/pipeline/Step5/AlexNet_paddle/deploy/inference_python/infer.py#L48 preprocess_ops = [] for op_info in self.pred_config.preprocess_infos: new_op_info = op_info.copy() op_type = new_op_info.pop('type') preprocess_ops.append(eval(op_type)(**new_op_info)) input_im_lst = [] input_im_info_lst = [] for im_path in image_list: im, im_info = preprocess(im_path, preprocess_ops) input_im_lst.append(im) input_im_info_lst.append(im_info) # 将进行预处理的输入转换为模型需要的格式(根据自己的模型来,也许不需要这个函数) inputs = create_inputs(input_im_lst, input_im_info_lst) return inputs # 后处理 def postprocess(self, np_boxes, np_masks, inputs, np_boxes_num, threshold=0.5): # postprocess output of predictor results = {} results['boxes'] = np_boxes results['boxes_num'] = np_boxes_num if np_masks is not None: results['masks'] = np_masks return results def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): # 前处理获得模型需要的输入 self.det_times.preprocess_time_s.start() inputs = self.preprocess(image_list) self.det_times.preprocess_time_s.end() np_boxes, np_masks = None, None input_names = self.predictor.get_input_names() # 获取输入输出的 name for i in range(len(input_names)): input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) for i in range(warmup): self.predictor.run() output_names = self.predictor.get_output_names() boxes_tensor = self.predictor.get_output_handle(output_names[0]) np_boxes = boxes_tensor.copy_to_cpu() if self.pred_config.mask: masks_tensor = self.predictor.get_output_handle(output_names[2]) np_masks = masks_tensor.copy_to_cpu() self.det_times.inference_time_s.start() for i in range(repeats): self.predictor.run() output_names = self.predictor.get_output_names() boxes_tensor = self.predictor.get_output_handle(output_names[0]) np_boxes = boxes_tensor.copy_to_cpu() boxes_num = self.predictor.get_output_handle(output_names[1]) np_boxes_num = boxes_num.copy_to_cpu() if self.pred_config.mask: masks_tensor = self.predictor.get_output_handle(output_names[2]) np_masks = masks_tensor.copy_to_cpu() self.det_times.inference_time_s.end(repeats=repeats) self.det_times.postprocess_time_s.start() results = [] # 经过后处理返回结果 if reduce(lambda x, y: x * y, np_boxes.shape) < 6: print('[WARNNING] No object detected.') results = {'boxes': np.array([[]]), 'boxes_num': [0]} else: results = self.postprocess( np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold) self.det_times.postprocess_time_s.end() self.det_times.img_num += len(image_list) return results def get_timer(self): return self.det_times def load_predictor(model_dir, run_mode='fluid', batch_size=1, device='CPU', min_subgraph_size=3, use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, trt_calib_mode=False, cpu_threads=1, enable_mkldnn=False): if device != 'GPU' and run_mode != 'fluid': raise ValueError( "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" .format(run_mode, device)) # 利用 inference 的 api 将模型加载进来 config = Config( os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams') ) if device == 'GPU': # initial GPU memory(M), device ID config.enable_use_gpu(200, 0) # optimize graph and fuse op config.switch_ir_optim(True) elif device == 'XPU': config.enable_xpu(10 * 1024 * 1024) else: config.disable_gpu() config.set_cpu_math_library_num_threads(cpu_threads) if enable_mkldnn: try: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() except Exception as e: print( "The current environment does not support `mkldnn`, so disable mkldnn." ) pass precision_map = { 'trt_int8': Config.Precision.Int8, 'trt_fp32': Config.Precision.Float32, 'trt_fp16': Config.Precision.Half } if run_mode in precision_map.keys(): config.enable_tensorrt_engine( workspace_size=1 << 10, max_batch_size=batch_size, min_subgraph_size=min_subgraph_size, precision_mode=precision_map[run_mode], use_static=False, use_calib_mode=trt_calib_mode) if use_dynamic_shape: min_input_shape = { 'image': [batch_size, 3, trt_min_shape, trt_min_shape] } max_input_shape = { 'image': [batch_size, 3, trt_max_shape, trt_max_shape] } opt_input_shape = { 'image': [batch_size, 3, trt_opt_shape, trt_opt_shape] } config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, opt_input_shape) print('trt set dynamic shape done!') # disable print log when predict config.disable_glog_info() # enable shared memory config.enable_memory_optim() # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) predictor = create_predictor(config) return predictor, config ''' 主要分为 bs 是否等 1 处理,考虑到可能要 padding。 另外把需要的字段及相关信息放到一个字典里,然后返回。 ''' def create_inputs(imgs, im_info): inputs = {} im_shape = [] scale_factor = [] if len(imgs) == 1: inputs['image'] = np.array((imgs[0], )).astype('float32') inputs['im_shape'] = np.array( (im_info[0]['im_shape'], )).astype('float32') inputs['scale_factor'] = np.array( (im_info[0]['scale_factor'], )).astype('float32') return inputs for e in im_info: im_shape.append(np.array((e['im_shape'], )).astype('float32')) scale_factor.append(np.array((e['scale_factor'], )).astype('float32')) inputs['im_shape'] = np.concatenate(im_shape, axis=0) inputs['scale_factor'] = np.concatenate(scale_factor, axis=0) imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs] max_shape_h = max([e[0] for e in imgs_shape]) max_shape_w = max([e[1] for e in imgs_shape]) padding_imgs = [] for img in imgs: im_c, im_h, im_w = img.shape[:] padding_im = np.zeros( (im_c, max_shape_h, max_shape_w), dtype=np.float32) padding_im[:, :im_h, :im_w] = img padding_imgs.append(padding_im) inputs['image'] = np.stack(padding_imgs, axis=0) return inputs
可以对编写好的 inference 代码,加载刚才导出的静态图模型,对图片进行测试
!python ./deploy/python/infer.py --device=gpu --model_dir=output_inference/retinanet_r50_fpn_1x_coco --batch_size=1 --image_dir=demo
结果的可视化会被保存至 .\output
:
shell 脚本在读取 txt 文件的信息是通过行数来读取的,是行对应的。比如,在 shell 脚本中编写在 txt 文件中的第 4 行(从 0 开始计数)读取是否使用 gpu,此时你如果把 use_gpu:True
写到了第 6 行,那么 shell 这边读到就是错误的信息。因此,不要随意添加或者删除行。如果必须添加或删除行,那么则需要同时修改对应的 shell 脚本。
以下注释正式使用的时候可以删除(注意用占行的双 # 号不要删除)。
===========================train_params=========================== model_name:retinanet_r50_fpn_1x_coco # 模型名称一般用作储存输出的文件夹名字 python:python3.7 gpu_list:0 # 一般设置为 0,暂不支持多 gpu use_gpu:True # 是否使用 gpu,如果除了 gpu 还要测试 cpu,则写为 True|False auto_cast:null|amp # 是否测试半精度 epoch:lite_train_lite_infer=2|lite_train_whole_infer=1|whole_train_whole_infer=12 save_dir:tipc/train_infer_python/output/retinanet_r50 # output 保存的地址 TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_train_whole_infer=1 pretrain_weights:/home/aistudio/data/data104154/best_model.pdparams # 预训练权重位置,也可以放最优权重 train_model_name:model_final.pdparams # train 保存的权重名称 train_infer_img_dir:./dataset/coco/test2017/ # 数据集位置 filename:retinanet_r50_fpn_1x_coco ## trainer:norm_train # 这里只对 norm_train 模式验证就好了 norm_train:tools/train.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o # 这里是运行 train 的命令行 一般需要修改的是 -c 后面的配置文件位置 pact_train:null fpgm_train:null distill_train:null null:null null:null ## ===========================eval_params=========================== eval:tools/eval.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o # 这里是运行 eval 的命令行 null:null ## ===========================infer_params=========================== --output_dir:./output_infer/python/retinanet_r50 # inference 的输出位置 weights:/home/aistudio/data/data104154/best_model.pdparams # 进行 inference 载入的最优权重 norm_export:tools/export_model.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o # 静态图模型导出的命令行 pact_export:null fpgm_export:null distill_export:null export1:null export2:null kl_quant_export:null ## infer_mode:norm infer_quant:False inference:./deploy/python/infer.py # infer 脚本的位置 --device:gpu|cpu # 对 gpu、cpu 都进行测试 --enable_mkldnn:True|False # cpu 测试时是否使用 mkldnn --cpu_threads:1|4 # 测试不同线程 --batch_size:1 --use_tensorrt:null # gpu 测试时是否使用 tensorrt --run_mode:fluid # 这里可以对 tensorrt 的精度进行设置,本次不要求 --model_dir:tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null/retinanet_r50_fpn_1x_coco # 进行 inference 载入的权重 --image_dir:./dataset/coco/test2017/ # inference 需要的图片 --save_log_path:null --run_benchmark:False --trt_max_shape:1600
对于 inference 中运行 infer.py 配置的其他的参数如 device、enable_mkldnn 等,就是输入到前面代码中的 load_predictor
函数对 predictor 进行初始化。
$(func_parser_key "${lines[i]}")
的 i 就应该加 1。"${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_autocast}"
your_params_key=$(func_parser_key "${lines[4]}") your_params_value=$(func_parser_value "${lines[4]}") cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_autocast}"
其实如果走通了原模型的 TIPC 验证,那么轻量化验证的 TIPC 就很容易了,这里以 mobilenetv1 为例,首先将原模型的 backbone 替换为 mobilenetv1。需要确定的是自己需要返回哪几层的特征,这里可以通过源码阅读了解网络结构得到: RetinaNet-Based-on-PPdet-main/ppdet/modeling/backbones/mobilenet_v1.py
,这里需要返回的是第 4, 6, 13 层的特征,这几层的 stride 刚好是 8, 16 和 32。预训练权重也得使用 mobilenetv1 的。可能优化器方面也需要进行修改,其余的都可以使用原来的:
_BASE_: [ '../datasets/coco_detection.yml', '../runtime.yml', '_base_/retinanet_r50_fpn.yml', '_base_/optimizer_1x.yml', '_base_/retinanet_reader.yml', ] pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams RetinaNet: backbone: MobileNet MobileNet: scale: 1 feature_maps: [4, 6, 13] with_extra_blocks: false extra_block_filters: []
然后训练获得最优权重后,将原来的 txt 文件的相关部分修改为 mobilenetv1 的内容(比如最优权重位置换为 mobilenetv1 的),这里两个对比的文件为:
tipc/train_infer_python/configs/retinanet/retinanet_mobilenet_v1_fpn_1x_coco.txt
tipc/train_infer_python/configs/retinanet/retinanet_r50_fpn_1x_coco.txt
更多详细的介绍可以参照文件 tipc/train_infer_python/README.md
!pip install -r requirements.txt !bash tipc/train_infer_python/prepare.sh tipc/train_infer_python/configs/retinanet/retinanet_r50_fpn_1x_coco.txt 'lite_train_lite_infer'
!bash tipc/train_infer_python/test_train_inference_python.sh tipc/train_infer_python/configs/retinanet/retinanet_r50_fpn_1x_coco.txt 'lite_train_lite_infer'
如果运行成功则会输出,如下:
Run successfully with command - python3.7 tools/train.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o use_gpu=True save_dir=tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null epoch=2 pretrain_weights=/home/aistudio/data/data104154/best_model.pdparams TrainReader.batch_size=2 filename=retinanet_r50_fpn_1x_coco ! Run successfully with command - python3.7 tools/eval.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o weights=tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null/retinanet_r50_fpn_1x_coco/model_final.pdparams use_gpu=True ! Run successfully with command - python3.7 tools/export_model.py -c configs/retinanet/retinanet_r50_fpn_1x_coco.yml -o weights=tipc/train_infer_python/output/retinanet_r50/norm_train_gpus_0_autocast_null/retinanet_r50_fpn_1x_coco/model_final.pdparams filename=retinanet_r50_fpn_1x_coco --output_dir=./output_infer/python/retinanet_r50 ! Run successfully with command - python3.7 ./deploy/python/infer.py --device=gpu --run_mode=fluid --model_dir=./output_infer/python/retinanet_r50/retinanet_r50_fpn_1x_coco --batch_size=1 --image_dir=./dataset/coco/test2017/ --run_benchmark=False --trt_max_shape=1600 --output_dir=./output_infer/python/retinanet_r50 > tipc/train_infer_python/output/retinanet_r50/python_infer_gpu_precision_fluid_batchsize_1.log 2>&1 !
!bash tipc/train_infer_python/test_train_inference_python.sh tipc/train_infer_python/configs/retinanet/retinanet_mobilenet_v1_fpn_1x_coco.txt 'lite_train_lite_infer'
如果运行成功也会输出上述信息,输出和 log 文件保存至 tipc/train_infer_python/output
。将输出进行可视化如下:
serving 部署需要做的事就是在服务端加载好模型,然后通过客户端访问,并且进行推理。这里加载的模型是由静态图模型转换而来的,所以需要像之前一样进行静态图模型的导出。
如果我们已经做过 TIPC 了,那么我们就已经有该模型的静态图模型了。那么我们对静态图模型进行转换得到需要的 serving 模型后,主要进行服务端、客户端以及一些前后预处理,最后再把它连入 TIPC,就是跟之前一样写一个 shell 脚本和对应的 txt 文件。
首先,安装一些 serving 需要的包,检查一下自己的环境,并选择对应的版本安装:
!nvidia-smi
Tue Jan 25 09:56:06 2022 +-----------------------------------------------------------------------------+ | NVIDIA-SMI 418.67 Driver Version: 418.67 CUDA Version: 10.1 | |-------------------------------+----------------------+----------------------+ | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | |===============================+======================+======================| | 0 Tesla V100-SXM2... On | 00000000:05:00.0 Off | 0 | | N/A 34C P0 40W / 300W | 0MiB / 32480MiB | 0% Default | +-------------------------------+----------------------+----------------------+ +-----------------------------------------------------------------------------+ | Processes: GPU Memory | | GPU PID Type Process name Usage | |=============================================================================| | No running processes found | +-----------------------------------------------------------------------------+
# 下载包 !wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl !wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.7.0-cp37-none-any.whl !wget https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-0.7.0-py3-none-any.whl
# 安装包 !pip install paddle_serving_server_gpu-0.7.0.post101-py3-none-any.whl !pip install paddle_serving_client-0.7.0-cp37-none-any.whl !pip install paddle_serving_app-0.7.0-py3-none-any.whl
使用下述命令就可以用静态图模型转换为 serving 需要的模型
''' --dirname 静态图模型保存目录 --model_filename 静态图模型文件名 --params_filename model.pdiparams --serving_server deploy/serving/serving_server 服务端文件保存目录 --serving_client deploy/serving/serving_client 客户端文件保存目录 ''' !python3 -m paddle_serving_client.convert --dirname output_inference/retinanet_r50_fpn_1x_coco --model_filename model.pdmodel --params_filename model.pdiparams --serving_server deploy/serving/serving_server --serving_client deploy/serving/serving_client
输出模型可以在目录 deploy/serving
找到。
服务端主要需要进行输入预处理和结果后处理的编写,可以参考以下代码
class RetinaNetOp(Op): def init_op(self): # 这里 compose 一些需要的预处理操作 self.eval_transforms = Compose([ Resize(target_size=[800, 1333]), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,0.225]), Permute(), PadStride(32) ]) def preprocess(self, input_dicts, data_id, log_id): # 这里其实主要针对 bs 为 1 的情况 (_, input_dict), = input_dicts.items() batch_size = len(input_dict.keys()) imgs = [] imgs_info = {'im_shape':[], 'scale_factor':[]} for key in input_dict.keys(): # 对传入进来的数据进行解码 data = base64.b64decode(input_dict[key].encode('utf8')) img = cv2.imdecode(np.frombuffer(data, np.uint8), cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 添加模型需要的字段 im_info = { 'scale_factor': np.array([1., 1.], dtype=np.float32), 'im_shape': img.shape[:2], } # 进行数据增强和预处理 img, im_info = self.eval_transforms(img, im_info) imgs.append(img[np.newaxis, :].copy()) imgs_info["im_shape"].append(im_info["im_shape"][np.newaxis, :].copy()) imgs_info["scale_factor"].append(im_info["scale_factor"][np.newaxis, :].copy()) input_imgs = np.concatenate(imgs, axis=0) input_im_shape = np.concatenate(imgs_info["im_shape"], axis=0) input_scale_factor = np.concatenate(imgs_info["scale_factor"], axis=0) # 最后的 return 只需要管第一个,它的内容就是你的模型推理需要的输入 return {"image": input_imgs, "im_shape": input_im_shape, "scale_factor": input_scale_factor}, False, None, "" def postprocess(self, input_dicts, fetch_dict, data_id, log_id): # 这里可以通过 deploy/serving/serving_server/serving_server_conf.prototxt 通过静态图模型转化de'dao np_boxes = list(fetch_dict.values())[0] # 这里是已经经过 nms 过后的输出,按照这个进行后处理就好了 keep = (np_boxes[:, 1] > 0.5) & (np_boxes[:, 0] > -1) np_boxes = np_boxes[keep, :] result = {"class_id": [], "confidence": [], "left_top": [], "right_bottom": []} for dt in np_boxes: clsid, bbox, score = int(dt[0]), dt[2:], dt[1] xmin, ymin, xmax, ymax = bbox result["class_id"].append(clsid) result["confidence"].append(score) result["left_top"].append([xmin, ymin]) result["right_bottom"].append([xmax, ymax]) result["class_id"] = str(result["class_id"]) result["confidence"] = str(result["confidence"]) result["left_top"] = str(result["left_top"]) result["right_bottom"] = str(result["right_bottom"]) return result, None, "" # 其余的都是固定的操作 class RetinaNetService(WebService): def get_pipeline_response(self, read_op): retinanet_op = RetinaNetOp(name="retinanet", input_ops=[read_op]) return retinanet_op # define the service class uci_service = RetinaNetService(name="retinanet") # load config and prepare the service uci_service.prepare_pipeline_config("config.yml") # start the service uci_service.run_service()
关于服务端配置文件的编写,这部分需要编写的内容不多:
op: # op 名称,与 web_service 中的 Service 类初始化 name 参数一致 retinanet: #并发数,is_thread_op=True时,为线程并发;否则为进程并发 concurrency: 1 #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 local_service_conf: # serving 模型导出的位置 model_config: "./serving_server"
具体详情可以参考:
deploy/serving/web_service.py
deploy/serving/config.yml
关于预处理算子的编写可以参考 deploy/serving/preprocess_ops.py
根据自己需要进行添加。
这里需要取终端运行 python3 deploy/serving/web_service.py &
,这里注意如果之前运行过需要像下图一样 kill 掉,另外此时的所有路径需要改为绝对路径,而后面接入 tipc 使用的是相对路径。
客户端就更简单了,主要说明你要使用哪个模型访问,测试哪张图片,具体如下:
def get_args(add_help=True): import argparse parser = argparse.ArgumentParser( description='Paddle Serving', add_help=add_help) # 需要测试的图片 parser.add_argument('--img_path', default="dataset/coco/test2017/000000575930.jpg") args = parser.parse_args() return args # 对输入进行编码 def cv2_to_base64(image): return base64.b64encode(image).decode('utf8') def main(args): # 访问的 url,注意改为你的模型名称 url = "http://127.0.0.1:18080/retinanet/prediction" logid = 10000 img_path = args.img_path with open(img_path, 'rb') as file: image_data1 = file.read() # data should be transformed to the base64 format image = cv2_to_base64(image_data1) data = {"key": ["image"], "value": [image], "logid": logid} # send requests r = requests.post(url=url, data=json.dumps(data)) print(r.json()) if __name__ == "__main__": args = get_args() main(args)
同样对编写客户端进行测试,按照上述方法进行启动服务端后,使用 ctrl c
,然后在终端输入命令运行客户端 python3 deploy/serving/pipeline_http_client.py
serving 接入 TIPC 没有之前那么复杂,需要改的东西很少,需要注意的点和之前也一样。要是 txt 文件的编写:
===========================serving_params=========================== model_name:RetinaNet python:python3.7 trans_model:-m paddle_serving_client.convert --dirname:output_infer/python/retinanet_r50/retinanet_r50_fpn_1x_coco # 静态图模型的位置,用于导出 serving model --model_filename:model.pdmodel --params_filename:model.pdiparams --serving_server:deploy/serving/serving_server # server 端模型导出的位置 --serving_client:deploy/serving/serving_client # client 端模型导出的位置 serving_dir:./deploy/serving web_service:web_service.py op.alexnet.local_service_conf.devices:0 null:null null:null null:null null:null pipline:pipeline_http_client.py ervice_conf.devices:0 null:null null:null null:null null:null pipline:pipeline_http_client.py --img_path:../../dataset/coco/test2017/000000575930.jpg # 需要测试的图片
更多详细的介绍以及环境配置,可以参照文件 tipc/serving/README.md
,首先按照依赖:
!pip install -r requirements.txt
进行测试
!bash tipc/serving/test_serving.sh tipc/serving/configs/retinanet_r50_fpn_1x_coco.txt
################### run test ################### /home/aistudio/deploy/serving 2022/01/25 11:23:14 start proxy service W0125 11:23:18.311997 6867 analysis_predictor.cc:795] The one-time configuration of analysis predictor failed, which may be due to native predictor called first and its configurations taken effect. I0125 11:23:18.441354 6867 analysis_predictor.cc:665] ir_optim is turned off, no IR pass will be executed [1m[35m--- Running analysis [ir_graph_build_pass][0m [1m[35m--- Running analysis [ir_graph_clean_pass][0m [1m[35m--- Running analysis [ir_analysis_pass][0m [1m[35m--- Running analysis [ir_params_sync_among_devices_pass][0m I0125 11:23:18.746263 6867 ir_params_sync_among_devices_pass.cc:45] Sync params from CPU to GPU [1m[35m--- Running analysis [adjust_cudnn_workspace_size_pass][0m [1m[35m--- Running analysis [inference_op_replace_pass][0m [1m[35m--- Running analysis [memory_optimize_pass][0m I0125 11:23:18.907073 6867 memory_optimize_pass.cc:216] Cluster name : reshape2_35.tmp_1 size: 0 I0125 11:23:18.907119 6867 memory_optimize_pass.cc:216] Cluster name : fill_constant_43.tmp_0 size: 4 I0125 11:23:18.907122 6867 memory_optimize_pass.cc:216] Cluster name : fill_constant_41.tmp_0 size: 4 I0125 11:23:18.907131 6867 memory_optimize_pass.cc:216] Cluster name : im_shape size: 8 I0125 11:23:18.907136 6867 memory_optimize_pass.cc:216] Cluster name : scale_factor size: 8 I0125 11:23:18.907138 6867 memory_optimize_pass.cc:216] Cluster name : image size: 12 I0125 11:23:18.907143 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_181.tmp_1 size: 144 I0125 11:23:18.907147 6867 memory_optimize_pass.cc:216] Cluster name : batch_norm_52.tmp_3 size: 8192 I0125 11:23:18.907155 6867 memory_optimize_pass.cc:216] Cluster name : relu_39.tmp_0 size: 4096 I0125 11:23:18.907160 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_123.tmp_0 size: 8192 I0125 11:23:18.907163 6867 memory_optimize_pass.cc:216] Cluster name : batch_norm_49.tmp_1 size: 8192 I0125 11:23:18.907166 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_161.tmp_1 size: 144 I0125 11:23:18.907169 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_171.tmp_1 size: 144 I0125 11:23:18.907172 6867 memory_optimize_pass.cc:216] Cluster name : relu_45.tmp_0 size: 8192 I0125 11:23:18.907176 6867 memory_optimize_pass.cc:216] Cluster name : elementwise_add_15 size: 8192 I0125 11:23:18.907179 6867 memory_optimize_pass.cc:216] Cluster name : reshape2_28.tmp_0 size: 320 I0125 11:23:18.907183 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_141.tmp_1 size: 144 I0125 11:23:18.907186 6867 memory_optimize_pass.cc:216] Cluster name : relu_21.tmp_0 size: 2048 I0125 11:23:18.907189 6867 memory_optimize_pass.cc:216] Cluster name : relu_88.tmp_0 size: 1024 I0125 11:23:18.907197 6867 memory_optimize_pass.cc:216] Cluster name : conv2d_151.tmp_1 size: 144 [1m[35m--- Running analysis [ir_graph_to_program_pass][0m I0125 11:23:19.583788 6867 analysis_predictor.cc:714] ======= optimize end ======= I0125 11:23:19.620709 6867 naive_executor.cc:98] --- skip [feed], feed -> scale_factor I0125 11:23:19.620766 6867 naive_executor.cc:98] --- skip [feed], feed -> image I0125 11:23:19.620771 6867 naive_executor.cc:98] --- skip [feed], feed -> im_shape I0125 11:23:19.632345 6867 naive_executor.cc:98] --- skip [_generated_var_22], fetch -> fetch I0125 11:23:19.632387 6867 naive_executor.cc:98] --- skip [_generated_var_23], fetch -> fetch W0125 11:23:19.708725 6867 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 10.1, Runtime API Version: 10.1 W0125 11:23:19.712651 6867 device_context.cc:465] device: 0, cuDNN Version: 7.6. {'err_no': 0, 'err_msg': '', 'key': ['class_id', 'confidence', 'left_top', 'right_bottom'], 'value': ['[0, 27, 39, 39, 39, 39, 48, 48]', '[0.9298271, 0.78884697, 0.609955, 0.56487834, 0.56370527, 0.5328276, 0.6830632, 0.67401433]', '[[288.6603, 9.321735], [412.90067, 172.55153], [539.206, 3.6034787], [557.5477, 4.5205536], [521.5307, 4.789155], [571.5571, 0.0], [15.654112, 242.51068], [202.25995, 197.21396]]', '[[638.92633, 390.8219], [477.95944, 296.9499], [559.34314, 64.882324], [572.8147, 40.891556], [538.74994, 67.22812], [583.3348, 42.266556], [213.04216, 322.14337], [368.59772, 320.33978]]'], 'tensors': []} [33m Run successfully with command - python3.7 pipeline_http_client.py --img_path=../../dataset/coco/test2017/000000575930.jpg> ../../tipc/serving/output/server_infer_gpu_pipeline_http_usetrt_null_precision_null_batchsize_1.log 2>&1! [0m
具体输出保存在 tipc/serving/output
,可以将 serving 输出:
{'err_no': 0, 'err_msg': '', 'key': ['class_id', 'confidence', 'left_top', 'right_bottom'], 'value': ['[0, 27, 39, 39, 39, 39, 48, 48]', '[0.9298271, 0.78884697, 0.609955, 0.56487834, 0.56370527, 0.5328276, 0.6830632, 0.67401433]', '[[288.6603, 9.321735], [412.90067, 172.55153], [539.206, 3.6034787], [557.5477, 4.5205536], [521.5307, 4.789155], [571.5571, 0.0], [15.654112, 242.51068], [202.25995, 197.21396]]', '[[638.92633, 390.8219], [477.95944, 296.9499], [559.34314, 64.882324], [572.8147, 40.891556], [538.74994, 67.22812], [583.3348, 42.266556], [213.04216, 322.14337], [368.59772, 320.33978]]'], 'tensors': []}
和 inference 输出进行对比
class_id:0, confidence:0.9298, left_top:[288.66,9.32],right_bottom:[638.93,390.82] class_id:27, confidence:0.7888, left_top:[412.90,172.55],right_bottom:[477.96,296.95] class_id:39, confidence:0.6100, left_top:[539.21,3.60],right_bottom:[559.34,64.88] class_id:39, confidence:0.5649, left_top:[557.55,4.52],right_bottom:[572.81,40.89] class_id:39, confidence:0.5637, left_top:[521.53,4.79],right_bottom:[538.75,67.23] class_id:39, confidence:0.5328, left_top:[571.56,0.00],right_bottom:[583.33,42.27] class_id:48, confidence:0.6831, left_top:[15.65,242.51],right_bottom:[213.04,322.14] class_id:48, confidence:0.6740, left_top:[202.26,197.21],right_bottom:[368.60,320.34]
完全一致????????????,至此完结 ????????????