Source code for pytb.detection.bboxes.bboxes_2d_detector.faster_rcnn.faster_rcnn

"""
Copyright (c) 2021-2022 UCLouvain, ICTEAM
Licensed under GPL-3.0 [see LICENSE for details]
Written by Jonathan Samelson and Benoît Gérin (2022)
"""

from pytb.detection.bboxes.bboxes_2d_detector.bboxes_2d_detector import BBoxes2DDetector
from pytb.output.bboxes_2d import BBoxes2D

from timeit import default_timer

import torch
import torchvision
import numpy as np
import logging

log = logging.getLogger("aptitude-toolbox")


[docs]class FasterRCNN(BBoxes2DDetector):
[docs] def __init__(self, proc_parameters: dict): """Initializes the detector with the given parameters. Args: proc_parameters (dict): A dictionary containing the FasterRCNN parameters. """ super().__init__(proc_parameters) # Whether to use the default weights available on PyTorch self.use_coco = proc_parameters["params"].get("use_coco_weights", True) # Whether to use the GPU if available. self.gpu = proc_parameters["params"].get("GPU", False) log.debug("GPU set to {}.".format(self.gpu)) log.debug("Faster-RCNN {} implementation selected.".format(self.pref_implem)) if self.pref_implem == "torch-resnet50": if self.use_coco: self.net = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) else: self.net = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False) # Use weights from the provided path self.net.load_state_dict(torch.load(self.model_path)) if self.gpu: self.net.cuda() else: self.net.cpu() # Change the mode of the network to eval, for inference. self.net.eval() else: assert False, "[ERROR] Unknown implementation of Faster-RCNN: {}".format(self.pref_implem)
[docs] def detect(self, frame: np.array) -> BBoxes2D: """Performs a Faster-RCNN inference on the given frame. Args: frame (np.array): The frame to infer Faster-RCNN detections Returns: BBoxes2D: A set of 2D bounding boxes identifying the detected objects. """ if self.pref_implem == "torch-resnet50": # Obtain values between 0 and 1 instead of 0 and 255 frame = frame.astype('float32') / 255.0 # Permute the channels of the image frame = torch.from_numpy(frame).permute(2, 0, 1) if self.gpu: frame = frame.cuda() output = self._detect_torch_resnet50_pretrained(frame) else: assert False, "[ERROR] Unknown implementation of Faster-RCNN: {}".format(self.pref_implem) return output
[docs] def _detect_torch_resnet50_pretrained(self, org_frame) -> BBoxes2D: """ Performs the inference using the implementation PyTorch Resnet50. Args: org_frame (np.array): The frame to infer Faster-RCNN detections. """ start = default_timer() with torch.no_grad(): predictions = self.net([org_frame]) boxes = predictions[0]['boxes'].to('cpu').numpy() labels = predictions[0]['labels'].to('cpu').numpy() scores = predictions[0]['scores'].to('cpu').numpy() #masks = predictions[0]['masks'].to('cpu').numpy() end = default_timer() bboxes = BBoxes2D((end - start), boxes, labels.astype(int), scores, self.input_width, self.input_height, "x1_y1_x2_y2") bboxes.to_xt_yt_w_h() return bboxes