Source code for jactorch.transforms.bbox.functional

#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# File   : functional.py
# Author : Jiayuan Mao
# Email  : maojiayuan@gmail.com
# Date   : 03/03/2018
#
# This file is part of Jacinle.
# Distributed under terms of the MIT license.

from PIL import Image
import numpy as np

from torchvision.transforms import functional as TF
from jacinle.utils.argument import get_2dshape



[docs]
def normalize_bbox(img, bbox):
    bbox = bbox.copy()
    bbox[:, 0] /= img.width
    bbox[:, 1] /= img.height
    bbox[:, 2] /= img.width
    bbox[:, 3] /= img.height
    return img, bbox




[docs]
def denormalize_bbox(img, bbox):
    bbox = bbox.copy()
    bbox[:, 0] *= img.width
    bbox[:, 1] *= img.height
    bbox[:, 2] *= img.width
    bbox[:, 3] *= img.height
    return img, bbox




[docs]
def crop(img, bbox, i, j, h, w):
    bbox = bbox.copy()

    bbox[:, 0] = (bbox[:, 0] - j / img.width) * (img.width / w)
    bbox[:, 1] = (bbox[:, 1] - i / img.height) * (img.height / h)
    bbox[:, 2] = (bbox[:, 2] - j / img.width) * (img.width / w)
    bbox[:, 3] = (bbox[:, 3] - i / img.height) * (img.height / h)
    return TF.crop(img, i, j, h, w), bbox




[docs]
def center_crop(img, bbox, output_size):
    output_size = get_2dshape(output_size)
    w, h = img.size
    th, tw = output_size
    i = int(round((h - th) / 2.))
    j = int(round((w - tw) / 2.))
    return crop(img, bbox, i, j, th, tw)




[docs]
def pad(img, bbox, padding, fill=0):
    img_new = TF.pad(img, padding, fill=fill)
    bbox = bbox.copy()
    if isinstance(padding, int):
        padding = (padding, padding, padding, padding)
    elif len(padding) == 2:
        padding = (padding[0], padding[1], padding[0], padding[1])
    else:
        assert len(padding) == 4

    bbox[:, 0] = (bbox[:, 0] + padding[0] / img.width) * (img.width / img_new.width)
    bbox[:, 1] = (bbox[:, 1] + padding[1] / img.height) * (img.height/ img_new.height)
    bbox[:, 2] = (bbox[:, 2] + padding[0] / img.width) * (img.width / img_new.width)
    bbox[:, 3] = (bbox[:, 3] + padding[1] / img.height) * (img.height/ img_new.height)
    return img_new, bbox




[docs]
def hflip(img, bbox):
    bbox = bbox.copy()
    bbox[:, 0] = 1 - bbox[:, 0]
    bbox[:, 2] = 1 - bbox[:, 2]
    return TF.hflip(img), bbox




[docs]
def vflip(img, bbox):
    bbox = bbox.copy()
    bbox[:, 1] = 1 - bbox[:, 1]
    bbox[:, 3] = 1 - bbox[:, 3]
    return TF.vflip(img), bbox




[docs]
def resize(img, bbox, size, interpolation=Image.BILINEAR):
    # Assuming bboxdinates are 0/1-normalized.
    return TF.resize(img, size, interpolation=interpolation), bbox




[docs]
def resized_crop(img, bbox, i, j, h, w, size, interpolation=Image.BILINEAR):
    img, bbox = crop(img, bbox, i, j, h, w)
    img, bbox = resize(img, bbox, size, interpolation)
    return img, bbox




[docs]
def rotate(img, bbox, angle, resample, expand, center):
    assert angle == 0
    return img, bbox




[docs]
def pad_multiple_of(img, coor, multiple, fill=0):
    h, w = img.height, img.width
    hh = h - h % multiple + multiple * int(h % multiple == 0)
    ww = w - w % multiple + multiple * int(w % multiple == 0)
    if h != hh or w != ww:
        return pad(img, coor, (0, 0, ww - w, hh - h), fill=fill)
    return img, coor