@TOC
本文只是对于pytorch深度学习框架的使用方法的介绍,如果涉及算法中复杂的数学原理,本文将不予阐述,敬请读者自行阅读相关论文或者文献。
代码 | 含义 |
---|---|
float32 | 32位float |
float | floa |
float64 | 64位float |
double | double |
float16 | 16位float |
bfloat16 | 比float范围大但精度低 |
int8 | 8位int |
int16 | 16位int |
short | short |
int32 | 32位int |
int | int |
int64 | 64位int |
long | long |
complex32 | 32位complex |
complex64 | 64位complex |
cfloat | complex float |
complex128 | 128位complex float |
cdouble | complex double |
创建tensor时,有很多参数可以选择,为节省篇幅,本文在列举API时只列举一次,不列举重载的API。
@overload def empty(size: Sequence[Union[_int, SymInt]], *, memory_format: Optional[memory_format]=None, out: Optional[Tensor]=None, dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
size:[行数,列数]
dtype(deepth type):数据类型
device:选择运算设备
requires_grad:是否进行自动求导,默认为False
gpu=torch.device("cuda") empty_tensor=torch.empty(size=[3,4],device=gpu,requires_grad=True) print(empty_tensor)
输出
tensor([[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]], device='cuda:0', requires_grad=True)
@overload def ones(size: _size, *, names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
size:[行数,列数]
dtype(deepth type):数据类型
device:选择运算设备
requires_grad:是否进行自动求导,默认为False
@overload def zeros(size: _size, *, names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload def rand(size: _size, *, generator: Optional[Generator], names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
@overload def randint(low: _int, high: _int, size: _size, *, generator: Optional[Generator]=None, dtype: Optional[_dtype]=None, device: Device=None, requires_grad: _bool=False) -> Tensor: ...
int_tensor=torch.randint(low=0,high=20,size=[5,6],device=gpu) print(int_tensor)
输出
tensor([[18, 0, 14, 7, 18, 14], [17, 0, 2, 0, 0, 3], [16, 17, 5, 15, 1, 14], [ 7, 12, 8, 6, 4, 11], [12, 4, 7, 5, 3, 3]], device='cuda:0')
@overload def randn(size: _size, *, generator: Optional[Generator], names: Optional[Sequence[Union[str, ellipsis, None]]], dtype: Optional[_dtype]=None, layout: Optional[_layout]=None, device: Optional[Union[_device, str, None]]=None, pin_memory: Optional[_bool]=False, requires_grad: Optional[_bool]=False) -> Tensor: ...
def tensor(data: Any, dtype: Optional[_dtype]=None, device: Device=None, requires_grad: _bool=False) -> Tensor: ...
torch.from_numpy()
,返回的tensor与ndarray共享内存。def numpy(self,*args, **kwargs): # real signature unknown; NOTE: unreliably restored from __doc__ pass
item
def item(self): # real signature unknown; restored from __doc__ ...
def dim(self): #real signature unknown; restored from __doc__ return 0
dtype = property(lambda self: object(), lambda self, v: None, lambda self: None) # default
def size(self,dim=None): # real signature unknown; restored from __doc__ pass
.shape
效果相同假设有模型A和模型B,我们需要将A的输出作为B的输入,但训练时我们只训练模型B. 那么可以这样做:
input_B = output_A.detach()
它可以使两个计算图的梯度传递断开,从而实现我们所需的功能。
返回一个新的tensor,新的tensor和原来的tensor共享数据内存,但不涉及梯度计算,即requires_grad=False。修改其中一个tensor的值,另一个也会改变,因为是共享同一块内存。
sequence_tensor=torch.tensor(np.array([[[1,2,3], [4,5,6]], [[9,8,7], [6,5,4]]]), dtype=torch.float,device=gpu,) sequence_tensor_shallowCp=sequence_tensor.detach() sequence_tensor_shallowCp+=1 print(sequence_tensor) print(sequence_tensor_shallowCp.requires_grad)
输出
tensor([[[ 2., 3., 4.], [ 5., 6., 7.]], [[10., 9., 8.], [ 7., 6., 5.]]], device='cuda:0') False
.clone().detach()
.new_tensor()
向量或矩阵转置
def t(self): # real signature unknown; restored from __doc__ """ t() -> Tensor See :func:`torch.t` """ return _te.Tensor(*(), **{})
指定两个维度进行转置:
def permute(self, dims: _size) -> Tensor: r""" permute(*dims) -> Tensor See :func:`torch.permute` """ ...
.t()
等价于.permute(0, 1)
多维度同时转置
def permute(self, *dims): # real signature unknown; restored from __doc__ """ permute(*dims) -> Tensor See :func:`torch.permute` """ return _te.Tensor(*(), **{})
cat
堆叠cat
可以把两个或多个tensor沿着指定的维度进行连接,连接后的tensor维度个数不变,指定维度上的大小改变,非指定维度上的大小不变。譬如,两个shape=(3,)
行向量按dim=0
连接,变成1个shape=(6,)
的行向量;2个3阶方阵按dim=0
连接,就变成1个(6, 3)
的矩阵。
cat
在使用时对输入的这些tensor有要求:除了指定维度,其他维度的大小必须相同。譬如,1个shape=(1, 6)
的矩阵可以和1个shape=(2, 6)
的矩阵在dim=0
连接。
例子可以参考下面的定义和注释。
def cat(tensors: Union[Tuple[Tensor, ...], List[Tensor]], dim: _int = 0, *, out: Optional[Tensor] = None) -> Tensor: r""" cat(tensors, dim=0, *, out=None) -> Tensor Concatenates the given sequence of :attr:`seq` tensors in the given dimension. All tensors must either have the same shape (except in the concatenating dimension) or be a 1-D empty tensor with size ``(0,)``. :func:`torch.cat` can be seen as an inverse operation for :func:`torch.split` and :func:`torch.chunk`. :func:`torch.cat` can be best understood via examples. .. seealso:: :func:`torch.stack` concatenates the given sequence along a new dimension. Args: tensors (sequence of Tensors): any python sequence of tensors of the same type. Non-empty tensors provided must have the same shape, except in the cat dimension. dim (int, optional): the dimension over which the tensors are concatenated Keyword args: out (Tensor, optional): the output tensor. Example:: >>> x = torch.randn(2, 3) >>> x tensor([[ 0.6580, -1.0969, -0.4614], [-0.1034, -0.5790, 0.1497]]) >>> torch.cat((x, x, x), 0) tensor([[ 0.6580, -1.0969, -0.4614], [-0.1034, -0.5790, 0.1497], [ 0.6580, -1.0969, -0.4614], [-0.1034, -0.5790, 0.1497], [ 0.6580, -1.0969, -0.4614], [-0.1034, -0.5790, 0.1497]]) >>> torch.cat((x, x, x), 1) tensor([[ 0.6580, -1.0969, -0.4614, 0.6580, -1.0969, -0.4614, 0.6580, -1.0969, -0.4614], [-0.1034, -0.5790, 0.1497, -0.1034, -0.5790, 0.1497, -0.1034, -0.5790, 0.1497]]) """ ...
stack
堆叠stack
与cat
有很大的区别,stack
把两个或多个tensor在dim
上创建一个全新的维度进行连接,非指定维度个数不变,创建的维度的大小取决于这次连接使用了多少个tensor。譬如,3个shape=(3,)
行向量按dim=0
连接,会变成一个shape=(3, 3)
的矩阵;两个3阶方阵按dim=-1
连接,就变成一个(3, 3, 2)
的tensor。
def stack(tensors: Union[Tuple[Tensor, ...], List[Tensor]], dim: _int = 0, *, out: Optional[Tensor] = None) -> Tensor: r""" stack(tensors, dim=0, *, out=None) -> Tensor Concatenates a sequence of tensors along a new dimension. All tensors need to be of the same size. .. seealso:: :func:`torch.cat` concatenates the given sequence along an existing dimension. Arguments: tensors (sequence of Tensors): sequence of tensors to concatenate dim (int, optional): dimension to insert. Has to be between 0 and the number of dimensions of concatenated tensors (inclusive). Default: 0 Keyword args: out (Tensor, optional): the output tensor. Example:: >>> x = torch.randn(2, 3) >>> x tensor([[ 0.3367, 0.1288, 0.2345], [ 0.2303, -1.1229, -0.1863]]) >>> x = torch.stack((x, x)) # same as torch.stack((x, x), dim=0) >>> x tensor([[[ 0.3367, 0.1288, 0.2345], [ 0.2303, -1.1229, -0.1863]], [[ 0.3367, 0.1288, 0.2345], [ 0.2303, -1.1229, -0.1863]]]) >>> x.size() torch.Size([2, 2, 3]) >>> x = torch.stack((x, x), dim=1) tensor([[[ 0.3367, 0.1288, 0.2345], [ 0.3367, 0.1288, 0.2345]], [[ 0.2303, -1.1229, -0.1863], [ 0.2303, -1.1229, -0.1863]]]) >>> x = torch.stack((x, x), dim=2) tensor([[[ 0.3367, 0.3367], [ 0.1288, 0.1288], [ 0.2345, 0.2345]], [[ 0.2303, 0.2303], [-1.1229, -1.1229], [-0.1863, -0.1863]]]) >>> x = torch.stack((x, x), dim=-1) tensor([[[ 0.3367, 0.3367], [ 0.1288, 0.1288], [ 0.2345, 0.2345]], [[ 0.2303, 0.2303], [-1.1229, -1.1229], [-0.1863, -0.1863]]]) """ ...
view
改变形状view
先把数据变成一维数组,然后再转换成指定形状。变换前后的元素个数并不会改变,所以变换前后的shape的乘积必须相等。详细例子如下:
def view(self, *shape): # real signature unknown; restored from __doc__ """ Example:: >>> x = torch.randn(4, 4) >>> x.size() torch.Size([4, 4]) >>> y = x.view(16) >>> y.size() torch.Size([16]) >>> z = x.view(-1, 8) # the size -1 is inferred from other dimensions >>> z.size() torch.Size([2, 8]) >>> a = torch.randn(1, 2, 3, 4) >>> a.size() torch.Size([1, 2, 3, 4]) >>> b = a.transpose(1, 2) # Swaps 2nd and 3rd dimension >>> b.size() torch.Size([1, 3, 2, 4]) >>> c = a.view(1, 3, 2, 4) # Does not change tensor layout in memory >>> c.size() torch.Size([1, 3, 2, 4]) >>> torch.equal(b, c) False """ return _te.Tensor(*(), **{})
reshape
改变形状reshape
与view
的区别如下:
view
只能改变连续(.contiguous())的tensor,如果已经对tensor进行了permute、transpose等操作,tensor在内存中会变得不连续,此时调用view
会报错。且view
方法与原来的tensor共享内存。reshape
再调用时自动检测原tensor是否连续,如果是,则等价于view
;如果不是,先调用.contiguous()
,再调用view
,此时返回值与原来tensor不共享内存。def reshape(self, shape: Sequence[Union[_int, SymInt]]) -> Tensor: ...
def mean(self, dim=None, keepdim=False, *args, **kwargs): # real signature unknown; NOTE: unreliably restored from __doc__ ... def sum(self, dim=None, keepdim=False, dtype=None): # real signature unknown; restored from __doc__ ... def median(self, dim=None, keepdim=False): # real signature unknown; restored from __doc__ ... def mode(self, dim=None, keepdim=False): # real signature unknown; restored from __doc__ ... def dist(self, other, p=2): # real signature unknown; restored from __doc__ ... def std(self, dim, unbiased=True, keepdim=False): # real signature unknown; restored from __doc__ ... def var(self, dim, unbiased=True, keepdim=False): # real signature unknown; restored from __doc__ ... def cumsum(self, dim, dtype=None): # real signature unknown; restored from __doc__ ... def cumprod(self, dim, dtype=None): # real signature unknown; restored from __doc__ ...
to
可以把tensor转移到指定设备上。
def to(self, *args, **kwargs): # real signature unknown; restored from __doc__ """ Example:: >>> tensor = torch.randn(2, 2) # Initially dtype=float32, device=cpu >>> tensor.to(torch.float64) tensor([[-0.5044, 0.0005], [ 0.3310, -0.0584]], dtype=torch.float64) >>> cuda0 = torch.device('cuda:0') >>> tensor.to(cuda0) tensor([[-0.5044, 0.0005], [ 0.3310, -0.0584]], device='cuda:0') >>> tensor.to(cuda0, dtype=torch.float64) tensor([[-0.5044, 0.0005], [ 0.3310, -0.0584]], dtype=torch.float64, device='cuda:0') >>> other = torch.randn((), dtype=torch.float64, device=cuda0) >>> tensor.to(other, non_blocking=True) tensor([[-0.5044, 0.0005], [ 0.3310, -0.0584]], dtype=torch.float64, device='cuda:0') """ return _te.Tensor(*(), **{})
示例
sequence_tensor=torch.tensor(np.array([[[1,2,3], [4,5,6]], [[9,8,7], [6,5,4]]]), dtype=torch.float,device=gpu,requires_grad=True) multi_tensor=sequence_tensor*3+1 multi_tensor_mean=multi_tensor.mean() multi_tensor_mean.backward() print(sequence_tensor.grad)
输出
tensor([[[0.2500, 0.2500, 0.2500], [0.2500, 0.2500, 0.2500]], [[0.2500, 0.2500, 0.2500], [0.2500, 0.2500, 0.2500]]], device='cuda:0')
nn.Module是torch.nn提供的一个类,是pytorch中定义网络的必要的一个父类,在这个类中定义了很多有用的方法,使我们非常方便地计算。在我们进行网络的定义时,有两个地方需要特别注意:
class lr(nn.Module): def __init__(self): super(lr,self).__init__() self.linear=nn.Linear(1,1) def forward(self,x): y_predict=self.linear(x) return y_predict
其中,nn.Linear函数的参数为:输入的特征量,输出的特征量。
优化器(optimizer),用来操纵参数的梯度以更新参数,常见的方法有随机梯度下降(stochastic gradient descent)(SGD)等。
lr_scheduler允许模型在训练的过程中动态更新学习率,且提供了许多种策略可供选择,以下列举一些常用的:
指数衰减:在训练的过程中,学习率以设定的gamma参数进行指数的衰减。
class ExponentialLR(LRScheduler): """Decays the learning rate of each parameter group by gamma every epoch. When last_epoch=-1, sets initial lr as lr. Args: optimizer (Optimizer): Wrapped optimizer. gamma (float): Multiplicative factor of learning rate decay. last_epoch (int): The index of last epoch. Default: -1. verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False``. """ def __init__(self, optimizer, gamma, last_epoch=-1, verbose=False): self.gamma = gamma super().__init__(optimizer, last_epoch, verbose)
固定步长衰减:在固定的训练周期后,以指定的频率进行衰减。
class StepLR(LRScheduler): """Decays the learning rate of each parameter group by gamma every step_size epochs. Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. When last_epoch=-1, sets initial lr as lr. Args: optimizer (Optimizer): Wrapped optimizer. step_size (int): Period of learning rate decay. gamma (float): Multiplicative factor of learning rate decay. Default: 0.1. last_epoch (int): The index of last epoch. Default: -1. verbose (bool): If ``True``, prints a message to stdout for each update. Default: ``False``. Example: >>> # xdoctest: +SKIP >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.05 if epoch < 30 >>> # lr = 0.005 if 30 <= epoch < 60 >>> # lr = 0.0005 if 60 <= epoch < 90 >>> # ... >>> scheduler = StepLR(optimizer, step_size=30, gamma=0.1) >>> for epoch in range(100): >>> train(...) >>> validate(...) >>> scheduler.step() """ def __init__(self, optimizer, step_size, gamma=0.1, last_epoch=-1, verbose=False): self.step_size = step_size self.gamma = gamma super().__init__(optimizer, last_epoch, verbose)
optimizer.step()
后面跟着scheduler.step()
即可。在torch.nn中已经定义好了很多代价函数,只需要调用它们并且传入真实值、预测值,就可以返回结果,例如:
当然,也可以自己定义loss的计算过程。
if __name__=="__main__": import torch import numpy as np from torch import nn from torch import optim from matplotlib import pyplot gpu=torch.device("cuda") cpu="cpu" #定义网络 class lr(nn.Module): def __init__(self): #继承成员变量 super(lr,self).__init__() self.linear=nn.Linear(1,1) #定义前向传播函数 def forward(self,x): y_predict=self.linear(x) return y_predict #准备数据 x_train=torch.rand([200,1],device=gpu) y_train=torch.matmul(x_train,torch.tensor([[3]],dtype=torch.float32,requires_grad=True,device=gpu))+8 #实例化 model_lr=lr().to(gpu) optimizer=optim.SGD(model_lr.parameters(),0.02) cost_fn=nn.MSELoss() #开始计算 for i in range(1000): y_predict=model_lr.forward(x_train) cost=cost_fn(y_predict,y_train) optimizer.zero_grad() cost.backward(retain_graph=True) optimizer.step() if i%20==0: print(cost.item()) print(list(model_lr.parameters())) #进行预测与评估 model_lr.eval() y_predict_numpy=model_lr.forward(x_train).to(cpu).detach().numpy() x_train_numpy=x_train.to(cpu).detach().numpy() y_train_numpy=y_train.to(cpu).detach().numpy() pyplot.scatter(x_train_numpy,y_predict_numpy,c="r") pyplot.plot(x_train_numpy,y_train_numpy) pyplot.show()
输出
4.7310328227467835e-05 [Parameter containing: tensor([[3.0237]], device='cuda:0', requires_grad=True), Parameter containing: tensor([7.9876], device='cuda:0', requires_grad=True)]
绘制图
在pytorch中提供了数据集的父类torch.utils.data.Dataset,继承这个父类,我们可以非常快速地实现对数据的加载,与继承nn.Module类一样,我们同样必须定义一些必要的成员函数
SMSData_path="D:\Desktop\PycharmProjects\exercise\SMSSpamCollection" #数据来源:http://archive.ics.uci.edu/ml/machine-learning-databases/00228/ class SMSData(Dataset): def __init__(self): self.data=open(SMSData_path,"r",encoding="utf-8").readlines() def __getitem__(self, index): current_line=self.data[index].strip() label=current_line[:4].strip() content=current_line[4:].strip() return [label,content] def __len__(self): return len(self.data) SMSex=SMSData() print(SMSex.__getitem__(5)) print(SMSex.__len__())
输出
['spam', "FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv"] 5574
class DataLoader(Generic[T_co]): def __init__(self, dataset: Dataset[T_co], batch_size: Optional[int] = 1, shuffle: Optional[bool] = None, sampler: Union[Sampler, Iterable, None] = None, batch_sampler: Union[Sampler[Sequence], Iterable[Sequence], None] = None, num_workers: int = 0, collate_fn: Optional[_collate_fn_t] = None, pin_memory: bool = False, drop_last: bool = False, timeout: float = 0, worker_init_fn: Optional[_worker_init_fn_t] = None, multiprocessing_context=None, generator=None, *, prefetch_factor: int = 2, persistent_workers: bool = False, pin_memory_device: str = ""): #只列出参数表,以下详细内容不再列出
dataset:以Dataset类为父类的自定义类的实例化对象
batch_size:批处理的个数
shuffle:bool类型,若为True则表示提前打乱数据
num_workers:加载数据时用到的线程数
drop_last :bool类型,若为True:这个是对最后的未完成的batch来说的,比如你的batch_size设置为64,而一个训练集只有100个样本,那么训练的时候后面的36个就被扔掉了。如果为False(默认),那么会继续正常执行,只是最后的batch_size会小一点。
timeout:如果是正数,表明等待从worker进程中收集一个batch等待的时间,若超出设定的时间还没有收集到,那就不收集这个内容了。这个numeric应总是大于等于0,默认为0
import torch from torch.utils.data import Dataset,DataLoader import chardet gpu = torch.device("cuda") cpu="cpu" try: SMSData_path="SMSSpamCollection" #获取文件编码方式 with open(SMSData_path,"rb") as file: file_format=chardet.detect(file.read())["encoding"] class SMSData(Dataset): def __init__(self): self.data=open(SMSData_path,"r",encoding=file_format).readlines() def __getitem__(self, index): current_line=self.data[index].strip() origin=current_line[:4].strip() content=current_line[4:].strip() return [origin,content] def __len__(self): return len(self.data) SMSex=SMSData() SMSData_loader=DataLoader(dataset=SMSex,batch_size=2,shuffle=False,num_workers=2) if __name__=='__main__':#如果设置多线程,一定要加这句话,否则会报错 for i in SMSData_loader: print("遍历一:",i) break for i in enumerate(SMSData_loader): print("遍历二:",i) break for batch_index,(label,content) in enumerate(SMSData_loader): print("遍历三:",batch_index,label,content) break except BaseException as error: print(error)
输出
遍历一: [('ham', 'ham'), ('Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...', 'Ok lar... Joking wif u oni...')] 遍历二: (0, [('ham', 'ham'), ('Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...', 'Ok lar... Joking wif u oni...')]) 遍历三: 0 ('ham', 'ham') ('Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...', 'Ok lar... Joking wif u oni...')
class ToTensor: def __init__(self) -> None: _log_api_usage_once(self)
class Normalize(torch.nn.Module): def __init__(self, mean, std, inplace=False): super().__init__() _log_api_usage_once(self) self.mean = mean self.std = std self.inplace = inplace
mean:数据类型为元组,元组的长度取决于通道数
std:数据类型为元组,元组的长度取决于通道数
class Compose: def __init__(self, transforms): if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(self) self.transforms = transforms
transforms:数据类型为列表,列表中每个元素都是transforms模块中的一个类,如ToTensor和Normalize(隐式构造)。
import torchvision if __name__ == '__main__': MNIST=torchvision.datasets.MNIST(root="./data",train=True,download=False,transform=None) MNIST_normalize=torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize((0),(1))])(MNIST[0][0]) print(MNIST_normalize)
import torchvision import torch from torch.utils.data import DataLoader from torch import nn from torch import optim from torch.nn import functional as Activate from matplotlib import pyplot # 定义所用网络 class ExNet(nn.Module): def __init__(self): # super函数调用 super(ExNet, self).__init__() # 卷积层1 self.conv1 = nn.Conv2d(1, 15, 5) ''' 输入通道数1,输出通道数15,核的大小5,输入必须为1,输出可以自定义 ''' # 卷积层2 self.conv2 = nn.Conv2d(15, 30, 3) ''' 输入通道数15,输出通道数30,核的大小3,输入必须与上层的输出一致,输出可以自定义 ''' # 全连接层1 self.fully_connected_1 = nn.Linear(30 * 10 * 10, 40) ''' MNIST原始图像是1*28*28,输入为batch_size*1*28*28,经过卷积层1后,变为batch_size*15*24*24 经过池化层后,变为batch_size*15*12*12 经过卷积层2后,变为batch_size*30*10*10 这个全连接层的第一层输入个数就是这么来的 ''' # 全连接层2 self.fully_connected_2 = nn.Linear(40, 10) ''' 输入与上层保持一致 由于要鉴别十个数字,因此输出层的神经元个数必须是10 ''' # 定义前向传播 def forward(self, x): in_size = x.size(0) # 在本例中in_size,也就是BATCH_SIZE的值。输入的x可以看成是batch_size*1*28*28的张量。 # 卷积层1 out = self.conv1(x) # batch*1*28*28 -> batch*15*24*24 out = Activate.relu(out) # 调用ReLU激活函数 # 池化层 out = Activate.max_pool2d(out, 2, 2) # batch*15*24*24 -> batch*15*12*12(2*2的池化层会减半) # 卷积层2 out = self.conv2(out) # batch*15*12*12 -> batch*30*10*10 out = Activate.relu(out) # 调用ReLU激活函数 # flatten处理 out = out.view(in_size, -1) # 全连接层1 out = self.fully_connected_1(out) out = Activate.relu(out) # 全连接层2 out = self.fully_connected_2(out) # 归一化处理,以便进行交叉熵代价函数的运算 out = Activate.log_softmax(out, dim=1) return out # 开始训练 def train(the_model, the_device, train_loader, the_optimizer, the_epoch): # 模型相关设置 the_model=the_model.to(device=the_device) the_model.train(mode=True) # 用来绘制图像的变量 list_times = [] list_cost = [] # 每轮循环 for batch_idx, (data, target) in enumerate(train_loader): # 转移到指定设备上计算 data = data.to(the_device);target = target.to(the_device) # 优化器参数重置 the_optimizer.zero_grad() # 向前计算 output = the_model.forward(data) # 计算误差 cost = Activate.nll_loss(output, target) # 反向传播 cost.backward() # 参数更新 the_optimizer.step() # 打印信息 if batch_idx % 10 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( the_epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), cost.item())) print(batch_idx, cost.item()) list_times.append(batch_idx) list_cost.append(cost.item()) # 绘制图像 pyplot.scatter(list_times, list_cost) pyplot.savefig("costImage.jpg") pyplot.show() return def test(the_model, the_device, the_test_loader): # 设置训练模式 the_model=the_model.to(device=the_device) the_model.eval() # 测试的结果集 acc_vector = [] cost_vector = [] #开始测试 with torch.no_grad(): for index, (data, target) in enumerate(the_test_loader): # 转移到指定设备上计算 data = data.to(the_device);target = target.to(the_device) # 向前计算 output = the_model.forward(data) # 计算误差 cost = Activate.nll_loss(output, target) cost_vector.append(cost) pred = output.max(dim=1)[-1] # output的尺寸是[batch_size,10],对每行取最大值,返回索引编号,即代表模型预测手写数字的结果 cur_acc = pred.eq(target).float().mean() # 均值代表每组batch_size中查准率 acc_vector.append(cur_acc) # 打印结果 print("平均查准率:{}".format(sum(acc_vector)/len(acc_vector))) print("average cost:{}".format(sum(cost_vector)/len(cost_vector))) return if __name__ == '__main__': gpu = torch.device("cuda") cpu = "cpu" # 准备数据 transAndNorm = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0), (1))]) MNISTData = torchvision.datasets.MNIST(root="./data", train=True, download=False, transform=transAndNorm) MNISTtest = torchvision.datasets.MNIST(root="./data", train=False, download=False, transform=transAndNorm) MNISTData_loader = DataLoader(dataset=MNISTData, batch_size=10, shuffle=True) MNISTtest_loader = DataLoader(dataset=MNISTtest, batch_size=10, shuffle=True) # 实例化网络和优化器 MNISTnet_Ex = ExNet() MNIST_optimizer = optim.Adam(MNISTnet_Ex.parameters(), lr=0.001) # lr(learning rate)是学习率 for i in range(1,2): train(the_model=MNISTnet_Ex, the_device=gpu, train_loader=MNISTData_loader, the_optimizer=MNIST_optimizer, the_epoch=i) test(the_model=MNISTnet_Ex, the_device=gpu, the_test_loader=MNISTtest_loader)
输出
平均查准率:0.9804015159606934 average cost:0.061943911015987396
散点图
在刚刚的MNIST手写数字识别分类任务中,我们使用的数据集是pytorch官方内置的图片数据集。现在,我们要从零开始,尝试制作我们自己的数据集。
Oxford 102 Flower 是一个图像分类数据集,由 102 个花卉类别组成。被选为英国常见花卉的花卉。每个类别由 40 到 258 张图像组成。图像具有大尺度、姿势和光线变化。此外,还有一些类别在类别内有很大的变化,还有几个非常相似的类别。这里是flower102数据集的下载地址。解压后的文件目录如下:
如第三章一样建立即可,如下:
import torch from torch.utils.data import Dataset import os gpu = torch.device("cuda") cpu = "cpu" class flower102(Dataset): def __init__(self,root,resize,mode): super(flower102,self).__init__() pass def __len__(self): pass def __getitem__(self, item): pass
在训练集中,这102种花的类别名称如上图所示(我这里是经过重命名的),我们定义名称flower1
为数字标签1
,这样我们就建立了一个映射。接下来,稍微修改一下构造函数,就可以实现全部的映射。如下:
import csv import glob import random import os from PIL import Image import torch from torch.utils.data import Dataset, DataLoader import torchvision.transforms as transforms gpu = torch.device("cuda") cpu = "cpu" class flower102(Dataset): def __init__(self, root, resize, mode): super(flower102, self).__init__() self.root = root self.train_root = os.path.join(self.root, "train") self.val_root = os.path.join(self.root, "valid") self.test_root = os.path.join(self.root, "test") self.resize = resize self.mode = mode self.mean = [0.485, 0.456, 0.406] self.std = [0.229, 0.224, 0.225] self.cat2label = {} # 创建一个空字典,用于存储映射关系。 for name in sorted(os.listdir(os.path.join(self.train_root))): # 遍历训练集目录下的文件和文件夹,并按照名称排序。 if not os.path.isdir(os.path.join(self.train_root, name)): # 如果遍历到的是文件而不是文件夹,则跳过该项继续遍历下一项。 continue elif not (name in self.cat2label): self.cat2label[name] = len(self.cat2label.keys()) # 将文件夹名称与类别标签对应,类别标签为字典长度(每次循环增加1)。 print(self.cat2label) # 打印映射关系字典。 def __len__(self): pass def __getitem__(self, idx): pass # 创建数据集实例 db = flower102(r"D:\Desktop\Datasets\flower102\dataset", resize=224, mode="train")
结果如下:
{'flower1': 0, 'flower10': 1, 'flower100': 2, 'flower101': 3, 'flower102': 4, 'flower11': 5, 'flower12': 6, 'flower13': 7, 'flower14': 8, 'flower15': 9, 'flower16': 10, 'flower17': 11, 'flower18': 12, 'flower19': 13, 'flower2': 14, 'flower20': 15, 'flower21': 16, 'flower22': 17, 'flower23': 18, 'flower24': 19, 'flower25': 20, 'flower26': 21, 'flower27': 22, 'flower28': 23, 'flower29': 24, 'flower3': 25, 'flower30': 26, 'flower31': 27, 'flower32': 28, 'flower33': 29, 'flower34': 30, 'flower35': 31, 'flower36': 32, 'flower37': 33, 'flower38': 34, 'flower39': 35, 'flower4': 36, 'flower40': 37, 'flower41': 38, 'flower42': 39, 'flower43': 40, 'flower44': 41, 'flower45': 42, 'flower46': 43, 'flower47': 44, 'flower48': 45, 'flower49': 46, 'flower5': 47, 'flower50': 48, 'flower51': 49, 'flower52': 50, 'flower53': 51, 'flower54': 52, 'flower55': 53, 'flower56': 54, 'flower57': 55, 'flower58': 56, 'flower59': 57, 'flower6': 58, 'flower60': 59, 'flower61': 60, 'flower62': 61, 'flower63': 62, 'flower64': 63, 'flower65': 64, 'flower66': 65, 'flower67': 66, 'flower68': 67, 'flower69': 68, 'flower7': 69, 'flower70': 70, 'flower71': 71, 'flower72': 72, 'flower73': 73, 'flower74': 74, 'flower75': 75, 'flower76': 76, 'flower77': 77, 'flower78': 78, 'flower79': 79, 'flower8': 80, 'flower80': 81, 'flower81': 82, 'flower82': 83, 'flower83': 84, 'flower84': 85, 'flower85': 86, 'flower86': 87, 'flower87': 88, 'flower88': 89, 'flower89': 90, 'flower9': 91, 'flower90': 92, 'flower91': 93, 'flower92': 94, 'flower93': 95, 'flower94': 96, 'flower95': 97, 'flower96': 98, 'flower97': 99, 'flower98': 100, 'flower99': 101}
在建立了从名称到数字标签的映射后,我们希望有一个csv文件,里面存储了所有的图片路径及其数字标签,接下来,我们将定义一个load_csv函数去完成这件事,如下:
import csv import glob import random import os from PIL import Image import torch from torch.utils.data import Dataset, DataLoader import torchvision.transforms as transforms gpu = torch.device("cuda") cpu = "cpu" class flower102(Dataset): def __init__(self, root, resize, mode): super(flower102, self).__init__() self.root = root self.train_root = os.path.join(self.root, "train") self.val_root = os.path.join(self.root, "valid") self.test_root = os.path.join(self.root, "test") self.resize = resize self.mode = mode self.mean = [0.485, 0.456, 0.406] self.std = [0.229, 0.224, 0.225] self.cat2label = {} # 创建一个空字典,用于存储映射关系。 for name in sorted(os.listdir(os.path.join(self.train_root))): # 遍历训练集目录下的文件和文件夹,并按照名称排序。 if not os.path.isdir(os.path.join(self.train_root, name)): # 如果遍历到的是文件而不是文件夹,则跳过该项继续遍历下一项。 continue elif not (name in self.cat2label): self.cat2label[name] = len(self.cat2label.keys()) # 将文件夹名称与类别标签对应,类别标签为字典长度(每次循环增加1)。 print(self.cat2label) # 打印映射关系字典。 if mode == "train": self.images, self.labels = self.load_csv("images_train.csv") elif mode == "valid": self.images, self.labels = self.load_csv("images_valid.csv") else: raise Exception("invalid mode!", self.mode) # 加载CSV文件并返回图像路径和标签列表 def load_csv(self, filename): # 如果CSV文件不存在,则根据训练集目录和映射关系生成CSV文件 if not os.path.exists(os.path.join(self.root, filename)): images = [] for name in self.cat2label.keys(): images += glob.glob(os.path.join(self.root, self.mode, name, "*.png")) images += glob.glob(os.path.join(self.root, self.mode, name, "*.jpg")) images += glob.glob(os.path.join(self.root, self.mode, name, "*.jpeg")) random.shuffle(images) with open(os.path.join(self.root, filename), mode="w", newline="") as f: writer = csv.writer(f) for img in images: label = self.cat2label[img.split(os.sep)[-2]] writer.writerow([img, label]) print("written into csv file:", filename) # 从CSV文件中读取图像路径和标签 images = [] labels = [] with open(os.path.join(self.root, filename)) as f: reader = csv.reader(f) for row in reader: img, label = row label = int(label) images.append(img) labels.append(label) assert len(images) == len(labels) return images, labels # 反归一化 def denormalize(self, x_hat): pass def __len__(self): pass def __getitem__(self, idx): pass # 创建数据集实例 db = flower102(r"D:\Desktop\Datasets\flower102\dataset", resize=224, mode="train")
然后,我们获得了一个如下的csv文件:
在完成了load_csv函数后,这个数据集基本制作完成,接下来只需要完善__len__函数和__getitem__函数,并定义transform过程即可。
import csv import glob import random import os from PIL import Image import torch from torch.utils.data import Dataset, DataLoader import torchvision.transforms as transforms gpu = torch.device("cuda") cpu = "cpu" class flower102(Dataset): def __init__(self, root, resize, mode): super(flower102, self).__init__() self.root = root self.train_root = os.path.join(self.root, "train") self.val_root = os.path.join(self.root, "valid") self.test_root = os.path.join(self.root, "test") self.resize = resize self.mode = mode self.mean = [0.485, 0.456, 0.406] self.std = [0.229, 0.224, 0.225] self.cat2label = {} # 创建一个空字典,用于存储映射关系。 for name in sorted(os.listdir(os.path.join(self.train_root))): # 遍历训练集目录下的文件和文件夹,并按照名称排序。 if not os.path.isdir(os.path.join(self.train_root, name)): # 如果遍历到的是文件而不是文件夹,则跳过该项继续遍历下一项。 continue elif not (name in self.cat2label): self.cat2label[name] = len(self.cat2label.keys()) # 将文件夹名称与类别标签对应,类别标签为字典长度(每次循环增加1)。 print(self.cat2label) # 打印映射关系字典。 if mode == "train": self.images, self.labels = self.load_csv("images_train.csv") elif mode == "valid": self.images, self.labels = self.load_csv("images_valid.csv") else: raise Exception("invalid mode!", self.mode) # 加载CSV文件并返回图像路径和标签列表 def load_csv(self, filename): # 如果CSV文件不存在,则根据训练集目录和映射关系生成CSV文件 if not os.path.exists(os.path.join(self.root, filename)): images = [] for name in self.cat2label.keys(): images += glob.glob(os.path.join(self.root, self.mode, name, "*.png")) images += glob.glob(os.path.join(self.root, self.mode, name, "*.jpg")) images += glob.glob(os.path.join(self.root, self.mode, name, "*.jpeg")) random.shuffle(images) with open(os.path.join(self.root, filename), mode="w", newline="") as f: writer = csv.writer(f) for img in images: label = self.cat2label[img.split(os.sep)[-2]] writer.writerow([img, label]) print("written into csv file:", filename) # 从CSV文件中读取图像路径和标签 images = [] labels = [] with open(os.path.join(self.root, filename)) as f: reader = csv.reader(f) for row in reader: img, label = row label = int(label) images.append(img) labels.append(label) assert len(images) == len(labels) return images, labels # 反归一化 def denormalize(self, x_hat): # x_hat = (x - mean) / std # x = x_hat * std + mean # x.size(): [c, h, w] # mean.size(): [3] => [3, 1, 1] mean = torch.tensor(self.mean).unsqueeze(1).unsqueeze(1) std = torch.tensor(self.std).unsqueeze(1).unsqueeze(1) x = x_hat * std + mean return x def __len__(self): # 返回数据集中样本的数量 return len(self.images) def __getitem__(self, idx): # 根据索引获取图像和标签 img, label = self.images[idx], self.labels[idx] # 定义数据的预处理操作 tf = transforms.Compose([ lambda x: Image.open(x).convert("RGB"), # 以RGB格式打开图像 transforms.Resize((int(self.resize * 1.25), int(self.resize * 1.25))), # 调整图像大小为resize的1.25倍 transforms.RandomRotation(15), # 随机旋转图像(最大旋转角度为15度) transforms.CenterCrop(self.resize), # 将图像中心裁剪为resize大小 transforms.ToTensor(), # 将图像转换为Tensor类型 transforms.Normalize(mean=self.mean, std=self.std), # 归一化图像 ]) # 对图像进行预处理操作 img = tf(img) label = torch.tensor(label) return img, label # 创建数据集实例 db = flower102(r"D:\Desktop\Datasets\flower102\dataset", resize=224, mode="train")
if __name__=='__main__' : loader = DataLoader(dataset=db, shuffle=True,num_workers=1,batch_size=8) import matplotlib.pyplot as plt data,target=next(iter(db)) print(data.shape) plt.imshow(transforms.ToPILImage()(db.denormalize(data))) plt.show()
成功显示:
我们要保存的是:
def save( obj: object, f: FILE_LIKE, pickle_module: Any = pickle, pickle_protocol: int = DEFAULT_PROTOCOL, _use_new_zipfile_serialization: bool = True ) -> None:...
torch.save(MNISTnet_Ex.state_dict(),"MNIST.pt") torch.save(optimzer.state_dict(),"optimizer.pt") MNISTnet_Ex.load_state_dict(torch.load("MNIST.pt")) optimzer.load_state_dict(torch.load("optimizer.pt"))
pytoch官方提供了不少与训练的模型可供使用,如下:
model |
---|
AlexNet |
ConvNeXt |
DenseNet |
EfficientNet |
EfficientNetV2 |
GoogLeNet |
Inception V3 |
MaxVit |
MNASNet |
MobileNet V2 |
MobileNet V3 |
RegNet |
ResNet |
ResNeXt |
ShuffleNet V2 |
SqueezeNet |
SwinTransformer |
VGG |
VisionTransformer |
Wide ResNet |
关于这些模型的详细用途,可以自行前往pytorch官网查阅相关资料,具体原理本文不再涉及。
在使用预训练模型的过程中,最重要的一步是,确定这个预训练模型中哪些参数是需要训练的,哪些参数是不需要训练的,哪些参数是要修改的。
首先,查看一下resnet50的网络结构:
import torchvision.models as models print(models.resnet50(pretrained=True)) Resnet( ... (avgpool): AdaptiveAvgPool2d(output_size=(1, 1)) (fc): Linear(in_features=2048, out_features=1000, bias=True) )
看到最后一层是一个1000分类的全连接层,而我们第五章制作的数据集里,只需要102分类,因此,我们选择只修改最后一层的参数并训练。如下所示:
import torchvision.models as models import torch.nn as nn def set_parameter_requires_grad(model,need_train): if not need_train: for para in model.parameters(): para.requires_grad = False return def initalize_resnet50(num_classes,need_train=False,pretrained=True): trained_model=models.resnet50(pretrained=pretrained) input_size=224 set_parameter_requires_grad(trained_model, need_train) trained_model.fc = nn.Sequential( nn.Linear(trained_model.fc.in_features, num_classes), nn.LogSoftmax(dim=1), ) # trained_model.fc = nn.Sequential( # nn.Linear(trained_model.fc.in_features, num_classes), # nn.Flatten(), # ) return trained_model,input_size resnet50,input_size=initalize_resnet50(num_classes=102,need_train=False,pretrained=True)
训练的流程和记录如第四章所示即可,如下:
import copy # 导入copy模块,用于深拷贝对象 import os.path # 导入os.path模块,用于操作文件路径 import time # 导入time模块,用于计时 def train(model, dataLoader, criterion, optimzer, num_epoch, device, filename): """ 训练函数 Args: model: 模型对象 dataLoader: 数据加载器 criterion: 损失函数 optimzer: 优化器 num_epoch: 迭代次数 device: 计算设备 filename: 保存模型的文件名 Returns: model: 训练后的模型 train_acc_history: 训练集准确率历史 train_losses: 训练集损失历史 l_rs: 优化器学习率历史 """ since = time.time() # 获取当前时间 best_epoch = {"epoch": -1, "acc": 0 } # 存储最佳模型的epoch和准确率 model.to(device) # 将模型移动到计算设备上 train_acc_history = [] # 存储训练集准确率历史 train_losses = [] # 存储训练集损失历史 l_rs = [optimzer.param_groups[0]['lr']] # 存储优化器学习率历史 best_model_wts = copy.deepcopy(model.state_dict()) # 深拷贝当前模型的权重作为最佳模型权重 for epoch in range(num_epoch): # 迭代训练 print("Epoch {}/{}".format(epoch, num_epoch - 1)) print('*' * 10) running_loss = 0.0 # 初始化损失总和 running_correct = 0.0 # 初始化正确预测的样本数总和 for data, target in dataLoader: # 遍历数据加载器中的每个批次 data = data.to(device) # 将输入数据移动到计算设备上 target = target.to(device) # 将目标数据移动到计算设备上 optimzer.zero_grad() # 清零梯度 output = model.forward(data) # 前向传播 loss = criterion(output, target) # 计算损失 pred = output.argmax(dim=1) # 获取预测结果 loss.backward() # 反向传播 optimzer.step() # 更新参数 running_loss += loss.item() * data.size(0) # 累加损失 running_correct += torch.eq(pred, target).sum().float().item() # 累加正确预测的样本数 epoch_loss = running_loss / len(dataLoader.dataset) # 计算平均损失 epoch_acc = running_correct / len(dataLoader.dataset) # 计算准确率 time_elapsed = time.time() - since # 计算训练时间 print("Time elapsed {:.0f}m {:.0f}s".format(time_elapsed // 60, time_elapsed % 60)) print("Loss: {:4f} Acc:{:.4f}".format(epoch_loss, epoch_acc)) train_acc_history.append(epoch_acc) # 将准确率添加到历史列表中 train_losses.append(epoch_loss) # 将损失添加到历史列表中 if (epoch_acc > best_epoch["acc"]): # 更新最佳模型信息 best_epoch = { "epoch": epoch, "acc": epoch_acc } best_model_wts = copy.deepcopy(model.state_dict()) # 深拷贝当前模型权重作为最佳模型权重 state = { "state_dict": model.state_dict(), "best_acc": best_epoch["acc"], "optimzer": optimzer.state_dict(), } torch.save(state, filename) # 保存最佳模型的状态字典到文件 print("Optimzer learning rate : {:.7f}".format(optimzer.param_groups[0]['lr'])) # 打印当前优化器学习率 l_rs.append(optimzer.param_groups[0]['lr']) # 将当前优化器学习率添加到历史列表中 print() time_elapsed = time.time() - since # 计算总训练时间 print("Training complete in {:.0f}m {:.0f}s".format(time_elapsed // 60, time_elapsed % 60)) print("Best epoch:", best_epoch) model.load_state_dict(best_model_wts) # 加载最佳模型权重 return model, train_acc_history, train_losses, l_rs if __name__ == "__main__": import torch import Net import torch.nn as nn import torch.optim as optim optimzer = optim.Adam(params=Net.resnet50.parameters(), lr=1e-2) # 创建Adam优化器 sche = optim.lr_scheduler.StepLR(optimizer=optimzer, step_size=10, gamma=0.5) # 创建学习率调度器 criterion = nn.NLLLoss() # 创建负对数似然损失函数 #criterion=nn.CrossEntropyLoss() import flower102 from torch.utils.data import DataLoader db = flower102.flower102(r"D:\Desktop\Datasets\flower102\dataset", resize=Net.input_size, mode="train") # 创建数据集对象 loader = DataLoader(dataset=db, shuffle=True, num_workers=1, batch_size=5) # 创建数据加载器 model = Net.resnet50 # 创建模型对象 filename = "checkpoint.pth" # 模型保存文件名 if os.path.exists(filename): # 如果存在模型文件 checkpoint = torch.load(filename) # 加载模型状态字典 model.load_state_dict(checkpoint["state_dict"]) # 加载模型权重 model, train_acc_history, train_loss, LRS = train(model=model, dataLoader=loader, criterion=criterion, optimzer=optimzer, num_epoch=5, device=torch.device("cuda"), filename=filename)
下面是我训练5轮的结果:
Epoch0/4 ********** Time elapsed 0m 37s Loss: 11.229704 Acc:0.3515 Optimzer learning rate : 0.0100000 Epoch1/4 ********** Time elapsed 1m 12s Loss: 8.165128 Acc:0.5697 Optimzer learning rate : 0.0100000 Epoch2/4 ********** Time elapsed 2m 4s Loss: 7.410833 Acc:0.6363 Optimzer learning rate : 0.0100000 Epoch3/4 ********** Time elapsed 2m 60s Loss: 6.991850 Acc:0.6822 Optimzer learning rate : 0.0100000 Epoch4/4 ********** Time elapsed 3m 44s Loss: 6.482804 Acc:0.7128 Optimzer learning rate : 0.0100000 Training complete in 3m 44s Best epoch: {'epoch': 4, 'acc': 0.7127594627594628}
本文由博客一文多发平台 OpenWrite 发布!