Running a faster-rcnn object detector, I get segfaults or bus errors if batchsize is above a certain threshold.
import os
from pathlib import Path
from PIL import Image
import torch
import torchvision
from torchvision.ops import MultiScaleRoIAlign
from torchvision.datasets import FakeData
from torchvision.transforms.functional import pil_to_tensor
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.backbone_utils import _resnet_fpn_extractor
from torchvision.models.detection.faster_rcnn import (
FasterRCNN_ResNet50_FPN_Weights, FastRCNNPredictor, FasterRCNN as _FasterRCNN
)
from torchvision.models.detection import FasterRCNN
from torchvision.models.resnet import resnet50, ResNet50_Weights
from torch import nn
class TestingDataset(torch.utils.data.Dataset):
def __init__(self, img_root):
"""
Create a FakeData dataset for testing
Args:
img_root: path to the image directory.
"""
img_root.mkdir(exist_ok=True)
self.img_root = img_root
self.img_names = sorted(os.listdir(img_root))
# generate a fake dataset of images if it doesn't already exist.
# for testing inference, we don't need labels/bboxes, just images
if len(self.img_names) == 0:
fake_dset = FakeData(size=1000, image_size=(3,256,256))
for idx, (img, _) in enumerate(fake_dset):
img.save(Path(img_root / f"img{idx}.png"), "PNG")
self.img_names = sorted(os.listdir(img_root))
def __len__(self):
return len(self.img_names)
def __getitem__(self, idx):
img_name = self.img_names[idx]
img_path = os.path.join(self.img_root, img_name)
img = Image.open(img_path).convert("RGB")
img = pil_to_tensor(img).to(dtype=torch.float32)
img /= 255.
return (img,)# label)
def get_fasterrcnn(num_classes):
# Define new anchor sizes and aspect ratios
# You can customize these values based on your requirements
new_anchor_sizes = ((4,), (8,), (16,), (32,), (64,),)
new_aspect_ratios = (
(0.5, 1.0, 2.0, 4.0, 8.0),
) * len(new_anchor_sizes)
# Create a new AnchorGenerator with custom anchor sizes and aspect ratios
anchor_generator = AnchorGenerator(
sizes=new_anchor_sizes,
aspect_ratios=new_aspect_ratios,
)
print('Anchors', anchor_generator.num_anchors_per_location())
roi_pooler = torchvision.ops.MultiScaleRoIAlign(
featmap_names=['0'],
output_size=7,
sampling_ratio=2,
)
resnet = resnet50(
weights=ResNet50_Weights.IMAGENET1K_V1,
norm_layer=nn.BatchNorm2d
)
resnet_fpn = _resnet_fpn_extractor(resnet, 5)
model = _FasterRCNN(
resnet_fpn,
num_classes=num_classes,
min_size=256,
max_size=256,
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler,
)
return model
data_root = Path("./data/random_test_imgs")
data_root.mkdir(exist_ok=True, parents=True)
dataset = TestingDataset(
data_root / 'images'
)
dataloader = torch.utils.data.DataLoader(
dataset,
# threshold is batchsize 172: larger batches throw bus error or seg
# fault, smaller output bounding boxes, labels, scores as expected
batch_size=172,
shuffle=False,
# num_workers=4,
)
it = iter(dataloader)
num_classes = 2
model = get_fasterrcnn(num_classes)
model.eval()
with torch.no_grad():
# import pdb; pdb.set_trace()
# foo = model(next(it)[0].to(device))
predictions = model(next(it)[0])
print(
predictions[0]['boxes'][:5],
predictions[0]['labels'][:5],
predictions[0]['scores'][:5]
)
Python version: 3.11.6 | packaged by conda-forge | (main, Oct 3 2023, 10:37:07) [Clang 15.0.7 ] (64-bit runtime)
Python platform: macOS-13.6.2-arm64-arm-64bit
Is CUDA available: False
CUDA runtime version: No CUDA
CUDA_MODULE_LOADING set to: N/A
GPU models and configuration: No CUDA
Nvidia driver version: No CUDA
cuDNN version: No CUDA
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True
🐛 Describe the bug
Running a faster-rcnn object detector, I get segfaults or bus errors if batchsize is above a certain threshold.
Observations:
get_fasterrcnnbelow, I also tested with the model lifted verbatim from the object detection tutorial here with the same result (on my laptop the batchsize threshold was lower, around 50, but otherwise the effect was identical).torch.ops.torchvision.roi_align()at line 238 intorchvision/ops/roi_align.py, but when I try to step into that call, I'm guessing it goes to the dispatcher via__call__and then pdb shows me nothing else before segfaulting.Versions
PyTorch version: 2.1.1
Is debug build: False
CUDA used to build PyTorch: None
ROCM used to build PyTorch: N/A
OS: macOS 13.6.2 (arm64)
GCC version: Could not collect
Clang version: 14.0.3 (clang-1403.0.22.14.1)
CMake version: Could not collect
Libc version: N/A
Python version: 3.11.6 | packaged by conda-forge | (main, Oct 3 2023, 10:37:07) [Clang 15.0.7 ] (64-bit runtime)
Python platform: macOS-13.6.2-arm64-arm-64bit
Is CUDA available: False
CUDA runtime version: No CUDA
CUDA_MODULE_LOADING set to: N/A
GPU models and configuration: No CUDA
Nvidia driver version: No CUDA
cuDNN version: No CUDA
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True
CPU:
Apple M2 Pro
Versions of relevant libraries:
[pip3] mypy==1.7.1
[pip3] mypy-extensions==1.0.0
[pip3] numpy==1.26.2
[pip3] onnx==1.15.0
[pip3] pytest-mypy==0.10.3
[pip3] pytorch-lightning==2.1.2
[pip3] torch==2.1.1
[pip3] torchmetrics==1.2.1
[pip3] torchvision==0.16.1
[conda] numpy 1.26.2 pypi_0 pypi
[conda] pytorch-lightning 2.1.2 pypi_0 pypi
[conda] torch 2.1.1 pypi_0 pypi
[conda] torchmetrics 1.2.1 pypi_0 pypi
[conda] torchvision 0.16.1 pypi_0 pypi