Source code for extorch.nn.modules.loss

import torch
import torch.nn as nn
from torch import Tensor

from extorch.utils import expect
from extorch.nn.functional import dec_soft_assignment, mix_data


__all__ = [
        "HintonKDLoss",
        "CrossEntropyLabelSmooth",
        "CrossEntropyMixupLoss",
        "DECLoss"
]


[docs]class HintonKDLoss(nn.KLDivLoss): r""" Knowledge distillation loss proposed by Hinton (`Link`_). $L = (1 - \alpha) * L_{CE}(P_s, Y_{gt}) + \alpha * T^2 * L_{CE}(P_s, P_t)$ Args: T (float): Temperature parameter (>= 1.) used to smooth the softmax output. alpha (float): Trade-off coefficient between distillation and origin loss. reduction (str): Specifies the reduction to apply to the output. Default: "mean". kwargs: Other configurations for nn.CrossEntropyLoss. Examples:: >>> criterion = HintonKDLoss(T = 4., alpha = 0.9) >>> s_output = torch.randn((5, 10)) >>> t_output = torch.randn((5, 10)) >>> target = torch.ones(5, dtype = torch.long) >>> loss = criterion(s_output, t_output, target) .. _Link: https://arxiv.org/abs/1503.02531 """ def __init__(self, T: float, alpha: float, reduction: str = "mean", **kwargs) -> None: kl_reduction = "batchmean" if reduction == "mean" else reduction super(HintonKDLoss, self).__init__(reduction = kl_reduction) assert T >= 1., "Parameter T should not be smaller than 1." self.T = T assert 0. <= alpha <= 1., "Parameter alpha should be in [0, 1]." self.alpha = alpha self.ce_loss = nn.CrossEntropyLoss(reduction = reduction, **kwargs)
[docs] def forward(self, s_output: Tensor, t_output: Tensor, target: Tensor) -> Tensor: r""" Args: s_output (Tensor): Student network output. t_output (Tensor): Teacher network output. target (Tensor): Hard label of the input. Returns: Tensor: The calculated loss. """ if self.alpha == 0.: return self.ce_loss(s_output, target) soft_loss = super(HintonKDLoss, self).forward( torch.log_softmax(s_output / self.T, dim = 1), torch.softmax(t_output / self.T, dim = 1) ) if self.alpha == 1.: return self.T ** 2 * soft_loss hard_loss = self.ce_loss(s_output, target) return (1 - self.alpha) * hard_loss + self.alpha * self.T ** 2 * soft_loss
[docs]class CrossEntropyLabelSmooth(nn.Module): def __init__(self, epsilon: float) -> None: super(CrossEntropyLabelSmooth, self).__init__() self.epsilon = epsilon self.logsoftmax = nn.LogSoftmax(dim = 1)
[docs] def forward(self, input: Tensor, target: Tensor) -> Tensor: num_classes = int(input.shape[-1]) log_probs = self.logsoftmax(input) target = torch.zeros_like(log_probs).scatter_(1, target.unsqueeze(1), 1) target = (1 - self.epsilon) * target + self.epsilon / num_classes loss = - (target * log_probs).mean(0).sum() return loss
[docs]class CrossEntropyMixupLoss(nn.Module): r""" CrossEntropyLoss with mixup technique. Args: alpha (float): Parameter of the beta distribution. Default: 1.0. kwargs: Other arguments of torch.nn.CrossEntropyLoss (`Link`_). .. _Link: https://pytorch.org/docs/stable/_modules/torch/nn/modules/loss.html#CrossEntropyLoss """ def __init__(self, alpha: float = 1., **kwargs) -> None: super(CrossEntropyMixupLoss, self).__init__() self.alpha = alpha self._criterion = nn.CrossEntropyLoss(**kwargs)
[docs] def forward(self, input: Tensor, target: Tensor, net: nn.Module) -> Tensor: r""" Args: input (Tensor): Input examples. target (Tensor): Label of the input examples. net (nn.Module): Network to calculate the loss. Returns: loss (Tensor): The loss. """ mixed_input, mixed_target, _lambda = mix_data(input, target, self.alpha) mixed_output = net(mixed_input) loss = _lambda * self._criterion(mixed_output, target) + \ (1 - _lambda) * self._criterion(mixed_output, mixed_target) return loss
[docs]class DECLoss(nn.Module): r""" Loss used by Deep Embedded Clustering (DEC, `Link`_). Args: alpha (float): The degrees of freedom of the Student’s tdistribution. Default: 1.0. Examples:: >>> criterion = DECLoss(alpha = 1.) >>> embeddings = torch.randn((2, 10)) >>> centers = torch.randn((3, 10)) >>> loss = criterion(embeddings, centers) .. _Link: https://arxiv.org/abs/1511.06335 """ def __init__(self, alpha: float = 1.0, **kwargs) -> None: super(DECLoss, self).__init__() self.alpha = alpha self._criterion = nn.KLDivLoss(**kwargs)
[docs] def forward(self, input: Tensor, centers: Tensor) -> Tensor: q = dec_soft_assignment(input, centers, self.alpha) p = self.target_distribution(q).detach() return self._criterion(q.log(), p)
[docs] @staticmethod def target_distribution(input: Tensor) -> Tensor: weight = (input ** 2) / torch.sum(input, 0) return (weight.t() / torch.sum(weight, 1)).t()