其中features是训练数据特征,
labels是标签
from mxnet import autograd,nd num_inputs =2 #二维数据 num_examples =1000 #数据总量 true_w =[2,-3.4] #W实际值 true_b=4.2 # b实际值 features = nd.random.normal(scale=1,shape=(num_examples,num_inputs)) #随机生成1000个标准差为1的数据 labels=true_w[0]*features[:,0]+true_w[1]*features[:,1]+true_b #实际函数定义为上述 labels +=nd.random.normal(scale=0.01,shape=labels.shape) # 同时添加噪音(标准差为0.01,大小就是labes的样子(因为是矩阵乘法所以可知))进去 复制代码
经过操作以后features就是符合输入格式的输入值
labels 就是实验结果用来调整w用
from mxnet.gluon import data as gdata batch_size =10 # 将训练数据的特征和标签组合 dataset = gdata.ArrayDataset(features,labels) # 随机读取小批量 data_iter = gdata.DataLoader(dataset,batch_size,shuffle=True) #shuffle随机排序 复制代码
nn就是神经网络模块,Sequential是一个串联各个层的容器。在构建的时候就是往里面加层。当数据输入时,容器中的每一层都将以此计算并将输出作为下一层的输入。
from mxnet.gluon import nn net =nn.Sequential() net.add(nn.Dense(1))#线性回归的输出层又叫全连接层。顾连接层是一个Dense实例,我们定义该输出个数为1 复制代码
将权重参数初始化为均值为0,标准差为0.01的正太分布。 偏差参数默认值为0.00
from mxnet import init net.initialize(init.Normal(sigma=0.01)) 复制代码
from mxnet.gluon import loss as gloss loss = gloss.L2Loss() 复制代码
选用平方损失又称为L2范数损失
利用gluon创造Trainer实例,用collect_params获取全部参数,
选用sgd(小批量随机梯度下降)优化算法
from mxnet import gluon trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.03}) 复制代码
num_epochs = 3 for epoch in range(1,num_epochs+1): for X,y in data_iter: with autograd.record(): l = loss(net(X),y) l.backward() #l.backward()就是求其雅克比矩阵,各参数的梯度,以此学习 trainer.step(batch_size) l= loss(net(features),labels) print("epoch %d, loos:%f" % (epoch,l.mean().asnumpy())) 复制代码
把l.backward()删除后
UserWarning Traceback (most recent call last) <ipython-input-14-dd30099604ae> in <module> 5 l = loss(net(X),y) 6 ----> 7 trainer.step(batch_size) 8 l= loss(net(features),labels) 9 print("epoch %d, loos:%f" % (epoch,l.mean().asnumpy())) c:\programdata\miniconda3\envs\gluon\lib\site-packages\mxnet\gluon\trainer.py in step(self, batch_size, ignore_stale_grad) 289 290 self._allreduce_grads() --> 291 self._update(ignore_stale_grad) 292 293 def allreduce_grads(self): c:\programdata\miniconda3\envs\gluon\lib\site-packages\mxnet\gluon\trainer.py in _update(self, ignore_stale_grad) 371 "call step with ignore_stale_grad=True to suppress this " 372 "warning and skip updating of Parameters with stale gradient" \ --> 373 %(param.name, str(data.context))) 374 375 if self._kvstore and self._update_on_kvstore: UserWarning: Gradient of Parameter `dense0_weight` on context cpu(0) has not been updated by backward since last `step`. This could mean a bug in your model that made it only use a subset of the Parameters (Blocks) for this iteration. If you are intentionally only using a subset, call step with ignore_stale_grad=True to suppress this warning and skip updating of Parameters with stale gradient 复制代码