ardware Environment(Ascend/GPU/CPU): CPU
Software Environment:
– MindSpore version (source or binary): 1.6.0
– Python version (e.g., Python 3.7.5): 3.7.6
– OS platform and distribution (e.g., Linux Ubuntu 16.04): Ubuntu 4.15.0-74-generic
– GCC/Compiler version (if compiled from source):
此案例使用mindspore.dataset自定义数据集:
import os import numpy as np from PIL import Image import mindspore.common.dtype as mstype import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as C import mindspore.dataset.vision.c_transforms as vc class _dcp_Dataset: def __init__(self,img_root_dir,device_target="CPU"): if not os.path.exists(img_root_dir): raise RuntimeError(f"the input image dir {img_root_dir} is invalid") self.img_root_dir=img_root_dir self.img_names=[i for i in os.listdir(img_root_dir) if i.endswith(".jpg")] self.target=device_target def __len__(self): return len(self.img_names) def __getitem__(self, index): img_name=self.img_names[index] im=Image.open(os.path.join(self.img_root_dir,img_name)) image=np.array(im) label_str=img_name.split("_")[-1] label_str=label_str.split(".")[0] label=np.array(label_str) return image,label def creat_dataset(dataset_path,batch_size=2,num_shards=1,shard_id=0,device_target="CPU"): dataset=_dcp_Dataset(dataset_path,device_target) data_set=ds.GeneratorDataset(dataset,["image","label"],shuffle=True,num_shards=1,shard_id=0) image_trans=[ vc.Resize((224,224)), vc.RandomHorizontalFlip(), vc.Rescale(1/255,shift=0), vc.Normalize((0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023)), vc.HWC2CHW ] label_trans=[C.TypeCast(mstype.int32)] data_set=data_set.map(operations=image_trans,input_columns=["image"]) data_set=data_set.map(operations=label_trans,input_columns=["label"]) # data_set=data_set.shuffle(buffer_size=batch_size) data_set=data_set.batch(batch_size=batch_size,drop_remainder=True) # data_set=data_set.repeat(1) return data_set if __name__ == '__main__': data=creat_dataset("./image_DCP") print(data) data_loader = data.create_dict_iterator() for i, data in enumerate(data_loader): print(i) print(data)
报错信息:
此处缺少(),将此处代码改为 vc.HWC2CHW() 可正常执行。
例如:有 xxDataset -> map -> map -> batch 这样的数据处理流程。
可以按如下方式调试脚本:
按照上述的方法,可定位到是哪个map/batch出错了。