174 lines
6.6 KiB
Python
174 lines
6.6 KiB
Python
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
from utils.utils import process_zero_values
|
|
import ipdb
|
|
|
|
|
|
def _assert_no_grad(variable):
|
|
assert not variable.requires_grad, \
|
|
"nn criterions don't compute the gradient w.r.t. targets - please " \
|
|
"mark these variables as volatile or not requiring gradients"
|
|
|
|
|
|
class _Loss(nn.Module):
|
|
def __init__(self, size_average=True):
|
|
super(_Loss, self).__init__()
|
|
self.size_average = size_average
|
|
|
|
|
|
class _WeightedLoss(_Loss):
|
|
def __init__(self, weight=None, size_average=True):
|
|
super(_WeightedLoss, self).__init__(size_average)
|
|
self.register_buffer('weight', weight)
|
|
|
|
|
|
class CrossEntropyClassWeighted(_Loss):
|
|
|
|
def __init__(self, size_average=True, ignore_index=-100, reduce=None, reduction='elementwise_mean'):
|
|
super(CrossEntropyClassWeighted, self).__init__(size_average)
|
|
self.ignore_index = ignore_index
|
|
self.reduction = reduction
|
|
|
|
def forward(self, input, target, weight=None):
|
|
return F.cross_entropy(input, target, weight, ignore_index=self.ignore_index, reduction=self.reduction)
|
|
|
|
|
|
### clone this function from: https://github.com/krumo/swd_pytorch/blob/master/swd_pytorch.py. [Unofficial]
|
|
def discrepancy_slice_wasserstein(p1, p2):
|
|
s = p1.shape
|
|
if s[1] > 1:
|
|
proj = torch.randn(s[1], 128).cuda()
|
|
proj *= torch.rsqrt(torch.sum(torch.mul(proj, proj), 0, keepdim=True))
|
|
p1 = torch.matmul(p1, proj)
|
|
p2 = torch.matmul(p2, proj)
|
|
p1 = torch.topk(p1, s[0], dim=0)[0]
|
|
p2 = torch.topk(p2, s[0], dim=0)[0]
|
|
dist = p1 - p2
|
|
wdist = torch.mean(torch.mul(dist, dist))
|
|
|
|
return wdist
|
|
|
|
|
|
class McDalNetLoss(_WeightedLoss):
|
|
|
|
def __init__(self, weight=None, size_average=True):
|
|
super(McDalNetLoss, self).__init__(weight, size_average)
|
|
|
|
def forward(self, input1, input2, dis_type='L1'):
|
|
|
|
if dis_type == 'L1':
|
|
prob_s = F.softmax(input1, dim=1)
|
|
prob_t = F.softmax(input2, dim=1)
|
|
loss = torch.mean(torch.abs(prob_s - prob_t)) ### element-wise
|
|
elif dis_type == 'CE': ## Cross entropy
|
|
loss = - ((F.log_softmax(input2, dim=1)).mul(F.softmax(input1, dim=1))).mean() - (
|
|
(F.log_softmax(input1, dim=1)).mul(F.softmax(input2, dim=1))).mean()
|
|
loss = loss * 0.5
|
|
elif dis_type == 'KL': ##### averaged over elements, not the real KL div (summed over elements of instance, and averaged over instance)
|
|
############# nn.KLDivLoss(size_average=False) Vs F.kl_div()
|
|
loss = (F.kl_div(F.log_softmax(input1), F.softmax(input2))) + (
|
|
F.kl_div(F.log_softmax(input2), F.softmax(input1)))
|
|
loss = loss * 0.5
|
|
############# the following two distances are not evaluated in our paper, and need further investigation
|
|
elif dis_type == 'L2':
|
|
nClass = input1.size()[1]
|
|
prob_s = F.softmax(input1, dim=1)
|
|
prob_t = F.softmax(input2, dim=1)
|
|
loss = torch.norm(prob_s - prob_t, p=2, dim=1).mean() / nClass ### element-wise
|
|
elif dis_type == 'Wasse': ## distance proposed in Sliced wasserstein discrepancy for unsupervised domain adaptation,
|
|
prob_s = F.softmax(input1, dim=1)
|
|
prob_t = F.softmax(input2, dim=1)
|
|
loss = discrepancy_slice_wasserstein(prob_s, prob_t)
|
|
|
|
return loss
|
|
|
|
|
|
class TargetDiscrimLoss(_WeightedLoss):
|
|
def __init__(self, weight=None, size_average=True, num_classes=31):
|
|
super(TargetDiscrimLoss, self).__init__(weight, size_average)
|
|
self.num_classes = num_classes
|
|
|
|
def forward(self, input):
|
|
batch_size = input.size(0)
|
|
prob = F.softmax(input, dim=1)
|
|
|
|
if (prob.data[:, self.num_classes:].sum(1) == 0).sum() != 0: ########### in case of log(0)
|
|
soft_weight = torch.FloatTensor(batch_size).fill_(0)
|
|
soft_weight[prob[:, self.num_classes:].sum(1).data.cpu() == 0] = 1e-6
|
|
soft_weight_var = soft_weight.cuda()
|
|
loss = -((prob[:, self.num_classes:].sum(1) + soft_weight_var).log().mean())
|
|
else:
|
|
loss = -(prob[:, self.num_classes:].sum(1).log().mean())
|
|
return loss
|
|
|
|
class SourceDiscrimLoss(_WeightedLoss):
|
|
def __init__(self, weight=None, size_average=True, num_classes=31):
|
|
super(SourceDiscrimLoss, self).__init__(weight, size_average)
|
|
self.num_classes = num_classes
|
|
|
|
def forward(self, input):
|
|
batch_size = input.size(0)
|
|
prob = F.softmax(input, dim=1)
|
|
|
|
if (prob.data[:, :self.num_classes].sum(1) == 0).sum() != 0: ########### in case of log(0)
|
|
soft_weight = torch.FloatTensor(batch_size).fill_(0)
|
|
soft_weight[prob[:, :self.num_classes].sum(1).data.cpu() == 0] = 1e-6
|
|
soft_weight_var = soft_weight.cuda()
|
|
loss = -((prob[:, :self.num_classes].sum(1) + soft_weight_var).log().mean())
|
|
else:
|
|
loss = -(prob[:, :self.num_classes].sum(1).log().mean())
|
|
return loss
|
|
|
|
|
|
class ConcatenatedCELoss(_WeightedLoss):
|
|
def __init__(self, weight=None, size_average=True, num_classes=31):
|
|
super(ConcatenatedCELoss, self).__init__(weight, size_average)
|
|
self.num_classes = num_classes
|
|
|
|
def forward(self, input):
|
|
prob = F.softmax(input, dim=1)
|
|
prob_s = prob[:, :self.num_classes]
|
|
prob_t = prob[:, self.num_classes:]
|
|
|
|
prob_s = process_zero_values(prob_s)
|
|
prob_t = process_zero_values(prob_t)
|
|
loss = - (prob_s.log().mul(prob_t)).sum(1).mean() - (prob_t.log().mul(prob_s)).sum(1).mean()
|
|
loss = loss * 0.5
|
|
return loss
|
|
|
|
|
|
|
|
class ConcatenatedEMLoss(_WeightedLoss):
|
|
def __init__(self, weight=None, size_average=True, num_classes=31):
|
|
super(ConcatenatedEMLoss, self).__init__(weight, size_average)
|
|
self.num_classes = num_classes
|
|
|
|
def forward(self, input):
|
|
prob = F.softmax(input, dim=1)
|
|
prob_s = prob[:, :self.num_classes]
|
|
prob_t = prob[:, self.num_classes:]
|
|
prob_sum = prob_s + prob_t
|
|
prob_sum = process_zero_values(prob_sum)
|
|
loss = - prob_sum.log().mul(prob_sum).sum(1).mean()
|
|
|
|
return loss
|
|
|
|
class MinEntropyConsensusLoss(nn.Module):
|
|
def __init__(self, num_classes):
|
|
super(MinEntropyConsensusLoss, self).__init__()
|
|
self.num_classes = num_classes
|
|
|
|
def forward(self, x, y):
|
|
i = torch.eye(self.num_classes).unsqueeze(0).cuda()
|
|
x = F.log_softmax(x, dim=1)
|
|
y = F.log_softmax(y, dim=1)
|
|
x = x.unsqueeze(-1)
|
|
y = y.unsqueeze(-1)
|
|
|
|
ce_x = (- 1.0 * i * x).sum(1)
|
|
ce_y = (- 1.0 * i * y).sum(1)
|
|
|
|
ce = 0.5 * (ce_x + ce_y).min(1)[0].mean()
|
|
|
|
return ce |