背景: 在项目中需要解析pb二进制文件,转为明文,写入txt文本中保存,同时转为由分隔符进行分隔的行列式结构,在这记录一下这个过程,以下列出了两种方法:方法二存在bytes解析失败的问题,采取方法一:
主要由以下四步组成:
1、二进制数据反序列化
2、反序列化数据写入临时文件 temp
3、读取临时文件,转换为标准行列式,写入明文文件
4、删除临时文件
""" file_src:pb二进制文件 file_des:txt明文文件 """ def pb_2_txt(self, binary_conf, file_src, file_des): """ 从pb中读取数据文件 binary_conf 配置信息 file_src 二进制文件 file_des 明文文件存储位置 """ # 指明引入的包:proto生成的py文件 import pb2 if len(binary_conf['message']) > 0: try: # pb_message:就是生成的py文件中的对象 pb_message = eval(binary_conf['message']) except NameError: print_utils.print_warning('[FATAL] pb name not found: %s, quit' % binary_conf['message']) exit(1) except AttributeError: print_utils.print_warning('[FATAL] pb attribute not found: %s, quit' % binary_conf['message']) exit(1) else: print_utils.print_warning('[FATAL] message not found: %s, quit') exit(1) if len(binary_conf['message_name']) > 0: try: message_name = binary_conf['message_name'] except NameError: print_utils.print_warning('[FATAL] pb message_name not found: %s, quit' % binary_conf['message_name']) exit(1) except AttributeError: print_utils.print_warning('[FATAL] pb attribute not found: %s, quit' % binary_conf['message_name']) exit(1) else: print_utils.print_warning('[FATAL] message_name not found: %s, quit') exit(1) mesasge_list = [] """ 方法一:读取pb数据,写入文件,不转为dict,解决bytes类型数据转换失败的问题 1、二进制数据反序列化 2、反序列化数据写入临时文件 temp 3、读取临时文件,转换为标准行列式,写入明文文件 4、删除临时文件 """ try: # 1、二进制反序列化 with open(file_src, 'rb') as bf: binary_data = bf.read() # 反序列化 pb_message.ParseFromString(binary_data) except Exception as e: traceback.print_exc() print_utils.print_warning('[FATAL] ParseFromString fail: %s, quit' % binary_conf['message']) exit(1) try: # 2、反序列化数据写入临时文件 with open(file_des + '.temp', 'w') as tf: tf.write(str(pb_message)) except Exception as e: traceback.print_exc() print_utils.print_warning('[FATAL] write temp file fail: %s, quit' % binary_conf['message']) exit(1) try: # 读取临时文件,写入明文文件,转换为标准行列式 with open(file_des + '.temp', 'r') as tf: mesasge_list = [] temp_list = [] temp_content = tf.readlines() for line in temp_content: line = line.strip('\n') # 根据message_name区分,过滤首尾行,例如:coach_lines { } if '{' in line: # 首行 continue if '}' in line: # 尾行 # 写入message_list mesasge_list.append(binary_conf['split'].join(temp_list)) temp_list = [] else: # 非首尾行,写入 temp_list.append(line.split(': ')[1]) # 写入明文文件 self.write_all(file_des, mesasge_list) except Exception as e: traceback.print_exc() print_utils.print_warning('[FATAL] write txt file fail: %s, quit' % binary_conf['message']) exit(1) try: # 删除temp临时文件 os.remove(file_des + '.temp') except Exception as e: print_utils.print_warning('[WARNING] remove temp file fail: %s, quit' % binary_conf['message']) exit(1) # 方法二:pb转dict,有问题:bytes类型的数据protobuf_to_dict转换有问题 # try: # with open(file_src, 'rb') as bf: # # 二进制文件数据 # binary_data = bf.read() # # 反序列化 # pb_message.ParseFromString(binary_data) # # pb转dict # dict_data = protobuf_to_dict(pb_message) # # 处理dict,写入明文文件中 # coach_graphs = dict_data[message_name] # for graphDic in coach_graphs: # # print(graphDic) # # exit(1) # message = [] # # 处理common字段 # if len(binary_conf['common']) > 0: # count = 0 # for common_field in binary_conf['common']: # if count > 10: # exit(1) # msg_type = common_field.split(' ')[0] # msg_content = graphDic[common_field.split(' ')[1]] # # # bytes字段转为字符串 # if msg_type == 'bytes': # print('------') # print(("b'" + msg_content).decode()) # exit(1) # message.append(str(msg_content.decode("utf-8"))) # # message.append(str(msg_content.decode("utf-8").decode('gbk').encode('utf-8'))) # else: # message.append(str(msg_content)) # # print(msg_type) # # print(msg_content) # count += 1 # # 处理repeated字段 # if len(binary_conf['repeated']) > 0: # for common_field in binary_conf['repeated']: # for line_sid in graphDic[common_field.split(' ')[1]]: # # message.append(str(line_sid.decode("gbk"))) # aaa.decode("gbk") # message.append(str(line_sid)) # mesasge_list.append(binary_conf['split'].join(message)) # self.write_all(file_des, mesasge_list) # except Exception as e: # traceback.print_exc() # print_utils.print_warning('[FATAL] ParseFromString fail: %s, quit' % binary_conf['message']) # exit(1)