Source code for shok.utils.transforms.convert_to_tv_tensor_bboxes

import torch
from torchvision.tv_tensors import BoundingBoxes



[docs]
class ConvertToTVTensorBBoxes(torch.nn.Module):
    """
    Module to convert bounding boxes to torchvision tensors.

    This is a simplified version that does not include transformations.

    This is useful due to some torchvsion transforms requiring bounding boxes
    to be of type `torchvision.tv_tensors.BoundingBoxes`.
    """


[docs]
    def forward(self, x: torch.Tensor, y: torch.Tensor = None) -> torch.Tensor:
        """
        Applies transformation to input tensor `x` and optionally processes bounding boxes in `y`.

        Args:
            x (torch.Tensor): Input tensor, typically representing an image or batch of images.
            y (torch.Tensor, optional): Optional target dictionary. If provided and contains a "boxes" key,
                the bounding boxes are converted to a `BoundingBoxes` object in "xyxy" format with the same
                canvas size as `x` and dtype `torch.float32`.

        Returns:
            Tuple[torch.Tensor, dict]: The (possibly transformed) input tensor `x` and the
            updated target dictionary `y`.

        """
        if y is not None and "boxes" in y:
            y["boxes"] = BoundingBoxes(y["boxes"], format="xyxy", canvas_size=x.shape[1:], dtype=torch.float32)
        return x, y