先G后D
训练 G:
——S:pred1, pred2 = model(images) ; loss_seg1 = loss_calc(pred1, labels) ; loss.backward() ;
——T:pred_target1, pred_target2 = model(images) ;D_out1 = model_D1(F.softmax(pred_target1)) ;loss_adv_target1 = bce_loss(D_out1,(source_label)) ;loss.backward() ;
训练 D:
—— S:pred1 = pred1.detach() ;D_out1 = model_D1(F.softmax(pred1)) ;loss_D1 = bce_loss(D_out1, (source_label)) ;loss_D1.backward() ; source_label=0
—— T:pred_target1 = pred_target1.detach() ;D_out1 = model_D1(F.softmax(pred_target1)) ;loss_D1 = bce_loss(D_out1, (target_label)) ;loss_D1.backward() ; target_label=1
注意: nn.BCELoss(F.sigmoid(input), target),所以,这里的input=D_out1,相当于将概率值得分经过了sigmoid转变成标签,再去和0或者1比较,做一个二分类
optimizer.step() optimizer_D1.step() optimizer_D2.step() print('exp = {}'.format(args.snapshot_dir))p print('iter = {0:8d}/{1:8d}, loss_seg1 = {2:.3f} loss_seg2 = {3:.3f} loss_adv1 = {4:.3f}, loss_adv2 = {5:.3f} loss_D1 = {6:.3f} loss_D2 = {7:.3f}'.format( i_iter, args.num_steps, loss_seg_value1, loss_seg_value2, loss_adv_target_value1, loss_adv_target_value2, loss_D_value1, loss_D_value2)) exp = ./snapshots/ iter = 0/ 1, loss_seg1 = 5.472 loss_seg2 = 5.143 loss_adv1 = 0.695, loss_adv2 = 0.687 loss_D1 = 0.693 loss_D2 = 0.693
判别器 D
2 class FCDiscriminator(nn.Module): 3 def __init__(self, num_classes, ndf = 64): 4 super(FCDiscriminator, self).__init__() 5 self.conv1 = nn.Conv2d(num_classes, ndf, kernel_size=4, stride=2, padding=1) 6 self.conv2 = nn.Conv2d(ndf, ndf*2, kernel_size=4, stride=2, padding=1) 7 self.conv3 = nn.Conv2d(ndf*2, ndf*4, kernel_size=4, stride=2, padding=1) 8 self.conv4 = nn.Conv2d(ndf*4, ndf*8, kernel_size=4, stride=2, padding=1) 9 self.classifier = nn.Conv2d(ndf*8, 1, kernel_size=4, stride=2, padding=1) 10 self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True) 11 #self.up_sample = nn.Upsample(scale_factor=32, mode='bilinear') 12 #self.sigmoid = nn.Sigmoid() 13 def forward(self, x): 14 x = self.conv1(x) 15 x = self.leaky_relu(x) 16 x = self.conv2(x) 17 x = self.leaky_relu(x) 18 x = self.conv3(x) 19 x = self.leaky_relu(x) 20 x = self.conv4(x) 21 x = self.leaky_relu(x) 22 x = self.classifier(x) 23 # x = self.up_sample(x) 24 # x = self.sigmoid(x) 25 return x 26 model_D1 = FCDiscriminator(num_classes=args.num_classes) 27 model_D2 = FCDiscriminator(num_classes=args.num_classes) 28 source_label = 0 29 target_label = 1
model = DeeplabMulti(num_classes=args.num_classes)
model_D1:
FCDiscriminator( (conv1): Conv2d(19, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)) (conv2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)) (conv3): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)) (conv4): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)) (classifier): Conv2d(512, 1, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1)) (leaky_relu): LeakyReLU(negative_slope=0.2, inplace=True) )
生成器 G
1 model = DeeplabMulti(num_classes=args.num_classes) 2 3 def DeeplabMulti(num_classes=21): 4 model = ResNetMulti(Bottleneck, [3, 4, 23, 3], num_classes) 5 return model 6 7 class ResNetMulti(nn.Module): 8 def __init__(self, block, layers, num_classes): 9 self.inplanes = 64 10 super(ResNetMulti, self).__init__() 11 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 12 bias=False) 13 self.bn1 = nn.BatchNorm2d(64, affine=affine_par) 14 for i in self.bn1.parameters(): 15 i.requires_grad = False 16 self.relu = nn.ReLU(inplace=True) 17 self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change 18 self.layer1 = self._make_layer(block, 64, layers[0]) 19 self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 20 self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2) 21 self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4) 22 self.layer5 = self._make_pred_layer(Classifier_Module, 1024, [6, 12, 18, 24], [6, 12, 18, 24], num_classes) 23 self.layer6 = self._make_pred_layer(Classifier_Module, 2048, [6, 12, 18, 24], [6, 12, 18, 24], num_classes) 24 def forward(self, x): 25 x = self.conv1(x) 26 x = self.bn1(x) 27 x = self.relu(x) 28 x = self.maxpool(x) 29 x = self.layer1(x) 30 x = self.layer2(x) 31 32 x = self.layer3(x) 33 x1 = self.layer5(x) 34 35 x2 = self.layer4(x) 36 x2 = self.layer6(x2) 37 return x1, x2
训练G
1 for sub_i in range(args.iter_size): # 迭代 1 次 2 ############# train G 3 # 训练G的时候,不累计D的梯度 4 for param in model_D1.parameters(): 5 param.requires_grad = False 6 for param in model_D2.parameters(): 7 param.requires_grad = False 8 9 # train with source 10 _, batch = trainloader_iter.__next__() 11 12 images, labels, _, _ = batch 13 images = Variable(images).cuda(args.gpu) 14 15 pred1, pred2 = model(images) # 得到最后两层的特征图 16 pred1 = interp(pred1) # 特征图 上采样 17 pred2 = interp(pred2) 18 19 loss_seg1 = loss_calc(pred1, labels, args.gpu) 20 loss_seg2 = loss_calc(pred2, labels, args.gpu) 21 loss = loss_seg2 + args.lambda_seg * loss_seg1 # loss_seg2为主,最后一层是loss_2, 倒数第二层为loss_1 22 23 # proper normalization # 归一化 24 loss = loss / args.iter_size 25 loss.backward() # 损失反向传播 26 loss_seg_value1 += loss_seg1.data.item() / args.iter_size 27 loss_seg_value2 += loss_seg2.data.item() / args.iter_size 28 29 # train with target 30 _, batch = targetloader_iter.__next__() 31 images, _, _ = batch 32 images = Variable(images).cuda(args.gpu) 33 34 pred_target1, pred_target2 = model(images) 35 pred_target1 = interp_target(pred_target1) 36 pred_target2 = interp_target(pred_target2) 37 38 D_out1 = model_D1(F.softmax(pred_target1)) # D_out 目标域 最后两层的标签 为什么这里还要用 softmax 39 # print('D_out1.shape:', D_out1.shape) 40 D_out2 = model_D2(F.softmax(pred_target2)) 41 42 loss_adv_target1 = bce_loss(D_out1, 43 Variable(torch.FloatTensor(D_out1.data.size()).fill_(source_label)).cuda( 44 args.gpu)) 45 loss_adv_target2 = bce_loss(D_out2, 46 Variable(torch.FloatTensor(D_out2.data.size()).fill_(source_label)).cuda( 47 args.gpu)) 48 loss = args.lambda_adv_target1 * loss_adv_target1 + args.lambda_adv_target2 * loss_adv_target2 49 loss = loss / args.iter_size 50 loss.backward() # 损失反向传播 51 loss_adv_target_value1 += loss_adv_target1.item() / args.iter_size 52 loss_adv_target_value2 += loss_adv_target2.item() / args.iter_size
训练 D
############# train D # bring back requires_grad for param in model_D1.parameters(): # 计算判别器D的梯度 param.requires_grad = True for param in model_D2.parameters(): param.requires_grad = True # train with source pred1 = pred1.detach() # detach():截断node反向传播的梯度流,将某个node变成不需要梯度的Varibale pred2 = pred2.detach() D_out1 = model_D1(F.softmax(pred1)) D_out2 = model_D2(F.softmax(pred2)) loss_D1 = bce_loss(D_out1, Variable(torch.FloatTensor(D_out1.data.size()).fill_(source_label)).cuda(args.gpu)) loss_D2 = bce_loss(D_out2, Variable(torch.FloatTensor(D_out2.data.size()).fill_(source_label)).cuda(args.gpu)) loss_D1 = loss_D1 / args.iter_size / 2 loss_D2 = loss_D2 / args.iter_size / 2 loss_D1.backward() loss_D2.backward() loss_D_value1 += loss_D1.item() loss_D_value2 += loss_D2.item() # train with target pred_target1 = pred_target1.detach() pred_target2 = pred_target2.detach() D_out1 = model_D1(F.softmax(pred_target1)) D_out2 = model_D2(F.softmax(pred_target2)) loss_D1 = bce_loss(D_out1, Variable(torch.FloatTensor(D_out1.data.size()).fill_(target_label)).cuda(args.gpu)) loss_D2 = bce_loss(D_out2, Variable(torch.FloatTensor(D_out2.data.size()).fill_(target_label)).cuda(args.gpu)) loss_D1 = loss_D1 / args.iter_size / 2 loss_D2 = loss_D2 / args.iter_size / 2 loss_D1.backward() loss_D2.backward() loss_D_value1 += loss_D1.item() loss_D_value2 += loss_D2.item() optimizer.step() optimizer_D1.step() optimizer_D2.step()
1 class Conv2d(_ConvNd): 2 def __init__(self, in_channels, out_channels, kernel_size, stride=1, 3 padding=0, dilation=1, groups=1, 4 bias=True, padding_mode='zeros'): 5 kernel_size = _pair(kernel_size) 6 stride = _pair(stride) 7 padding = _pair(padding) 8 dilation = _pair(dilation) 9 super(Conv2d, self).__init__( 10 in_channels, out_channels, kernel_size, stride, padding, dilation, 11 False, _pair(0), groups, bias, padding_mode) 12 13 def _conv_forward(self, input, weight): 14 if self.padding_mode != 'zeros': 15 return F.conv2d(F.pad(input, self._padding_repeated_twice, mode=self.padding_mode), 16 weight, self.bias, self.stride, 17 _pair(0), self.dilation, self.groups) 18 return F.conv2d(input, weight, self.bias, self.stride, 19 self.padding, self.dilation, self.groups) 20 21 def forward(self, input): 22 return self._conv_forward(input, self.weight)
pred1:
pred1.shape: torch.Size([1, 19, 720, 1280]) (batch size, channel, height, width)
pred2:
tensor([[[[-1.3769, -1.3769, -1.3769, ..., 1.5919, 1.5919, 1.5919], [-1.3769, -1.3769, -1.3769, ..., 1.5919, 1.5919, 1.5919], [-1.3769, -1.3769, -1.3769, ..., 1.5919, 1.5919, 1.5919], ..., [[ 5.6749, 5.6749, 5.6749, ..., -1.6352, -1.6352, -1.6352], [ 5.6749, 5.6749, 5.6749, ..., -1.6352, -1.6352, -1.6352], [ 5.6749, 5.6749, 5.6749, ..., -1.6352, -1.6352, -1.6352], ..., [ 0.5306, 0.5306, 0.5306, ..., -0.0221, -0.0221, -0.0221], [ 0.5306, 0.5306, 0.5306, ..., -0.0221, -0.0221, -0.0221], [ 0.5306, 0.5306, 0.5306, ..., -0.0221, -0.0221, -0.0221]]]], device='cuda:0', grad_fn=<UpsampleBilinear2DBackward>)
Images:
images.shape: torch.Size([1, 3, 720, 1280])
tensor([[[[-43.0070, -43.0070, -43.0070, ..., -14.0070, -14.0070, -14.0070], [-43.0070, -43.0070, -43.0070, ..., -14.0070, -15.0070, -15.0070], [-43.0070, -43.0070, -43.0070, ..., -14.0070, -15.0070, -15.0070], ..., [[-87.6789, -87.6789, -87.6789, ..., -64.6789, -65.6789, -65.6789], [-87.6789, -87.6789, -87.6789, ..., -64.6789, -65.6789, -65.6789], [-87.6789, -87.6789, -87.6789, ..., -64.6789, -65.6789, -65.6789], ..., [ -8.6789, -13.6789, -8.6789, ..., -6.6789, -1.6789, 13.3211], [ -6.6789, -1.6789, 13.3211, ..., 2.3211, 2.3211, 3.3211], [ 13.3211, 14.3211, 20.3211, ..., 24.3211, 11.3211, -0.6789]]]], device='cuda:0')
labels:
labels.shape: torch.Size([1, 720, 1280])
tensor([[[2., 2., 2., ..., 2., 2., 2.], [2., 2., 2., ..., 2., 2., 2.], [2., 2., 2., ..., 2., 2., 2.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]])
batch:
<class 'list'>: [tensor([[[[-43.0070, -43.0070, -43.0070, ..., -14.0070, -14.0070, -14.0070], [-43.0070, -43.0070, -43.0070, ..., -14.0070, -15.0070, -15.0070], [-43.0070, -43.0070, -43.0070, ..., -14.0070, -15.0070, -15.0070], ..., [[-87.6789, -87.6789, -87.6789, ..., -64.6789, -65.6789, -65.6789], [-87.6789, -87.6789, -87.6789, ..., -64.6789, -65.6789, -65.6789], [-87.6789, -87.6789, -87.6789, ..., -64.6789, -65.6789, -65.6789], ..., [ -8.6789, -13.6789, -8.6789, ..., -6.6789, -1.6789, 13.3211], [ -6.6789, -1.6789, 13.3211, ..., 2.3211, 2.3211, 3.3211], [ 13.3211, 14.3211, 20.3211, ..., 24.3211, 11.3211, -0.6789]]]]), tensor([[[2., 2., 2., ..., 2., 2., 2.], [2., 2., 2., ..., 2., 2., 2.], [2., 2., 2., ..., 2., 2., 2.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]]), tensor([[ 720, 1280, 3]]), ['19636.png']]
loss_seg1: loss_seg1 = loss_calc(pred1, labels, args.gpu)
tensor(4.7282, device='cuda:0', grad_fn=<NllLossBackward>)
loss_seg2:
tensor(5.1414, device='cuda:0', grad_fn=<NllLossBackward>)
loss_seg1 = 6.467 , loss_seg2 = 5.127, loss_adv1 = 0.688, loss_adv2 = 0.693, loss_D1 = 0.693 ,loss_D2 = 0.693
pred_target1: pred_target1, pred_target2 = model(images)
pred_target1.shape: torch.Size([1, 19, 65, 129])
上采样后的pred_target1 :interp_pred_target1.shape: torch.Size([1, 19, 512, 1024])
tensor([[[[-4.8436, -4.8436, -4.8436, ..., -0.3238, -0.3238, -0.3238], [-4.8436, -4.8436, -4.8436, ..., -0.3238, -0.3238, -0.3238], [-4.8436, -4.8436, -4.8436, ..., -0.3238, -0.3238, -0.3238], ..., [ 1.0468, 1.0468, 1.0468, ..., -1.2073, -1.2073, -1.2073], [ 1.0468, 1.0468, 1.0468, ..., -1.2073, -1.2073, -1.2073], [ 1.0468, 1.0468, 1.0468, ..., -1.2073, -1.2073, -1.2073]], [[ 1.5940, 1.5940, 1.5940, ..., 0.3063, 0.3063, 0.3063], [ 1.5940, 1.5940, 1.5940, ..., 0.3063, 0.3063, 0.3063], [ 1.5940, 1.5940, 1.5940, ..., 0.3063, 0.3063, 0.3063], ..., [-3.3119, -3.3119, -3.3119, ..., -0.8856, -0.8856, -0.8856], [-3.3119, -3.3119, -3.3119, ..., -0.8856, -0.8856, -0.8856], [-3.3119, -3.3119, -3.3119, ..., -0.8856, -0.8856, -0.8856]]]], device='cuda:0', grad_fn=<UpsampleBilinear2DBackward>)
D_out1 : D_out1 = model_D1(F.softmax(pred_target1))
D_out1.shape: torch.Size([1, 1, 16, 32]) 16行32列的tensor
tensor([[[[0.0063, 0.0083, 0.0089, 0.0097, 0.0071, 0.0091, 0.0084, 0.0092, 0.0082, 0.0081, 0.0091, 0.0074, 0.0087, 0.0089, 0.0089, 0.0069, 0.0109, 0.0111, 0.0072, 0.0070, 0.0083, 0.0082, 0.0084, 0.0088, 0.0075, 0.0076, 0.0086, 0.0098, 0.0104, 0.0088, 0.0080, 0.0112], [0.0069, 0.0097, 0.0101, 0.0111, 0.0104, 0.0115, 0.0102, 0.0095, 0.0114, 0.0099, 0.0110, 0.0100, 0.0103, 0.0115, 0.0097, 0.0105, 0.0121, 0.0096, 0.0108, 0.0092, 0.0126, 0.0090, 0.0095, 0.0091, 0.0127, 0.0104, 0.0102, 0.0115, 0.0116, 0.0091, 0.0112, 0.0130], [0.0068, 0.0098, 0.0096, 0.0115, 0.0097, 0.0104, 0.0105, 0.0121, 0.0120, 0.0098, 0.0124, 0.0124, 0.0102, 0.0121, 0.0119, 0.0116, 0.0090, 0.0111, 0.0104, 0.0109, 0.0129, 0.0109, 0.0097, 0.0092, 0.0079, 0.0103, 0.0096, 0.0106, 0.0102, 0.0099, 0.0083, 0.0137], [0.0061, 0.0081, 0.0090, 0.0115, 0.0121, 0.0102, 0.0111, 0.0107, 0.0121, 0.0111, 0.0119, 0.0108, 0.0119, 0.0110, 0.0099, 0.0123, 0.0101, 0.0075, 0.0113, 0.0130, 0.0105, 0.0100, 0.0067, 0.0091, 0.0091, 0.0074, 0.0091, 0.0068, 0.0096, 0.0089, 0.0100, 0.0151], [0.0064, 0.0096, 0.0092, 0.0090, 0.0086, 0.0106, 0.0099, 0.0119, 0.0110, 0.0104, 0.0119, 0.0074, 0.0088, 0.0110, 0.0088, 0.0111, 0.0087, 0.0091, 0.0097, 0.0110, 0.0113, 0.0083, 0.0107, 0.0098, 0.0103, 0.0095, 0.0087, 0.0093, 0.0103, 0.0120, 0.0128, 0.0138], [0.0058, 0.0093, 0.0124, 0.0090, 0.0103, 0.0105, 0.0074, 0.0091, 0.0103, 0.0103, 0.0095, 0.0090, 0.0090, 0.0072, 0.0057, 0.0099, 0.0067, 0.0077, 0.0067, 0.0100, 0.0091, 0.0103, 0.0077, 0.0106, 0.0097, 0.0100, 0.0097, 0.0066, 0.0104, 0.0093, 0.0060, 0.0125], [0.0079, 0.0122, 0.0117, 0.0107, 0.0100, 0.0086, 0.0086, 0.0084, 0.0093, 0.0093, 0.0102, 0.0091, 0.0119, 0.0075, 0.0083, 0.0089, 0.0051, 0.0101, 0.0101, 0.0087, 0.0082, 0.0090, 0.0088, 0.0116, 0.0118, 0.0118, 0.0079, 0.0104, 0.0102, 0.0101, 0.0123, 0.0121], [0.0064, 0.0099, 0.0107, 0.0110, 0.0087, 0.0070, 0.0079, 0.0095, 0.0088, 0.0085, 0.0102, 0.0102, 0.0091, 0.0099, 0.0098, 0.0099, 0.0100, 0.0098, 0.0085, 0.0080, 0.0090, 0.0079, 0.0074, 0.0087, 0.0091, 0.0103, 0.0095, 0.0110, 0.0093, 0.0096, 0.0096, 0.0117], [0.0073, 0.0096, 0.0093, 0.0121, 0.0095, 0.0099, 0.0080, 0.0088, 0.0087, 0.0098, 0.0106, 0.0096, 0.0114, 0.0106, 0.0117, 0.0111, 0.0097, 0.0097, 0.0094, 0.0097, 0.0080, 0.0094, 0.0102, 0.0091, 0.0116, 0.0091, 0.0081, 0.0105, 0.0106, 0.0091, 0.0122, 0.0131], [0.0089, 0.0098, 0.0093, 0.0111, 0.0083, 0.0094, 0.0111, 0.0119, 0.0104, 0.0093, 0.0088, 0.0101, 0.0095, 0.0116, 0.0101, 0.0091, 0.0103, 0.0117, 0.0129, 0.0103, 0.0094, 0.0087, 0.0086, 0.0099, 0.0126, 0.0108, 0.0113, 0.0094, 0.0089, 0.0099, 0.0083, 0.0128], [0.0056, 0.0104, 0.0097, 0.0091, 0.0107, 0.0094, 0.0101, 0.0116, 0.0094, 0.0113, 0.0110, 0.0102, 0.0111, 0.0081, 0.0093, 0.0090, 0.0102, 0.0109, 0.0117, 0.0103, 0.0086, 0.0098, 0.0094, 0.0091, 0.0094, 0.0125, 0.0101, 0.0107, 0.0101, 0.0085, 0.0097, 0.0132], [0.0073, 0.0100, 0.0104, 0.0104, 0.0103, 0.0108, 0.0089, 0.0087, 0.0077, 0.0077, 0.0118, 0.0111, 0.0096, 0.0100, 0.0107, 0.0110, 0.0108, 0.0095, 0.0094, 0.0085, 0.0075, 0.0104, 0.0105, 0.0119, 0.0115, 0.0111, 0.0119, 0.0104, 0.0105, 0.0107, 0.0098, 0.0141], [0.0070, 0.0100, 0.0109, 0.0085, 0.0061, 0.0083, 0.0117, 0.0093, 0.0098, 0.0106, 0.0107, 0.0098, 0.0101, 0.0118, 0.0094, 0.0097, 0.0077, 0.0095, 0.0087, 0.0096, 0.0106, 0.0102, 0.0109, 0.0100, 0.0102, 0.0100, 0.0093, 0.0083, 0.0095, 0.0080, 0.0109, 0.0132], [0.0069, 0.0093, 0.0086, 0.0089, 0.0081, 0.0101, 0.0085, 0.0095, 0.0099, 0.0082, 0.0093, 0.0083, 0.0076, 0.0083, 0.0091, 0.0092, 0.0106, 0.0093, 0.0096, 0.0099, 0.0094, 0.0099, 0.0076, 0.0085, 0.0087, 0.0093, 0.0093, 0.0071, 0.0088, 0.0088, 0.0096, 0.0125], [0.0066, 0.0094, 0.0123, 0.0115, 0.0097, 0.0094, 0.0083, 0.0096, 0.0096, 0.0110, 0.0080, 0.0092, 0.0102, 0.0093, 0.0101, 0.0097, 0.0099, 0.0109, 0.0107, 0.0098, 0.0094, 0.0101, 0.0106, 0.0102, 0.0092, 0.0098, 0.0106, 0.0106, 0.0098, 0.0092, 0.0099, 0.0142], [0.0075, 0.0117, 0.0124, 0.0126, 0.0120, 0.0120, 0.0104, 0.0118, 0.0123, 0.0125, 0.0130, 0.0132, 0.0129, 0.0141, 0.0131, 0.0136, 0.0118, 0.0124, 0.0135, 0.0107, 0.0114, 0.0115, 0.0117, 0.0112, 0.0107, 0.0115, 0.0121, 0.0122, 0.0129, 0.0124, 0.0098, 0.0138]]]], device='cuda:0', grad_fn=<AddBackward0>)