本文代码建议把图片和标注文件分开放
图片存在 images 文件夹下(这里用不到)
标注文件存在 annotations 文件夹下
这里还用到 tqdm 库来显示进度条,不需要可以注释掉
import xml.etree.ElementTree as ET #导入xml模块 import pickle import os import glob from os import listdir, getcwd from os.path import join from tqdm import tqdm#缺包的需要安装 def class_num(_dir,class_name,dataset,nums): result = {} for clss in class_name: result[clss]=0 #各个类别的标签数量 #print(result) result["other"]=0#其他标签的数量 result["sum"]=0#总的标签数量 #total参数设置进度条的总长度 pbar = tqdm(total=nums,desc="%s-porcess"%dataset,unit="xml") for xmll in glob.glob(_dir+"*.xml"): #print(xml) #time.sleep(0.05) pbar.update(1)#每次更新进度条的长度 with open(xmll,"r",encoding="utf-8") as f: xml = ET.parse(f) # root = xml.getroot() # print(root.findall("object")) for obj in xml.iter('object'): result["sum"] = result["sum"]+1 if obj.find("name").text not in class_name: result["other"] = result["other"]+1 for clsn in class_name: if obj.find("name").text == clsn: #按标注的标签名进行统计 result[clsn] = result[clsn]+1 pbar.close()#关闭占用资源 return result if __name__ == '__main__': train_dir="E:/DL/detectron2/SwinT_detectron2/datasets/new/train/annotations/" test_dir="E:/DL/detectron2/SwinT_detectron2/datasets/test/annotations/" class_name = ["0","1","2"]#标注时的各类别的名称 train_num = len(os.listdir(train_dir))#计算标签文件的数量 test_num = len(os.listdir(test_dir)) print(train_num,test_num) results1 = class_num(train_dir,class_name,"train",train_num) results2 = class_num(test_dir,class_name,"test",test_num) print("\n\n训练集: ",results1) print("\n测试集: ",results2)