import detectron2
from detectron2.engine import DefaultTrainer
History and development
# Released in 2019 as successor to Detectron
Key features
# Supports Mask R-CNN, Faster R-CNN, RetinaNet, and more
Use cases and applications
# Example: pedestrian detection in self-driving cars
Installing Detectron2
pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html
Setting up the environment
# Ensure compatible CUDA and PyTorch versions before installing
Overview of PyTorch framework
import torch
x = torch.tensor([1, 2, 3])
print(x)
Detectron2 vs Detectron (v1)
# Detectron2 supports newer architectures and training paradigms
Community and resources
# https://github.com/facebookresearch/detectron2
Basic architecture overview
# Backbone + RPN + ROI Heads = end-to-end detection pipeline
# Detectron2 returns bounding boxes and class labels for objects
Bounding boxes explained
# Format: [x_min, y_min, x_max, y_max]
Common detection algorithms
# Detectron2 supports many of these algorithms out-of-the-box
Intersection over Union (IoU)
# IoU = (Area of Overlap) / (Area of Union)
Non-Maximum Suppression (NMS)
# Keeps boxes with highest scores, suppresses others
Precision, recall, and mAP
# mAP is standard metric for object detection benchmarks
Dataset formats (COCO, Pascal VOC)
# COCO uses JSON, VOC uses XML annotation files
Preparing datasets for Detectron2
from detectron2.data import DatasetCatalog
# Register dataset for training
Annotation tools overview
# LabelImg exports annotations in VOC XML or YOLO TXT format
Detectron2 data pipeline
from detectron2.data import build_detection_train_loader
# Modules imported separately for customization
Backbone networks
from detectron2.modeling import build_backbone
Region Proposal Network (RPN)
# Generates region proposals for RoI pooling
ROI Pooling and Align
# ROI Align preferred for mask and keypoint accuracy
Box Head and Mask Head
# Separate heads for detection and segmentation tasks
Anchor generation
# Anchors guide RPN and detection heads
Training and inference flow
trainer = DefaultTrainer(cfg)
trainer.train()
Configuration system
from detectron2.config import get_cfg
cfg = get_cfg()
Model zoo overview
# Download and load pre-trained models for faster training
Custom model pipelines
# Extend classes and override methods for customization
# Linux example: pip install detectron2 -f \ https://dl.fbaipublicfiles.com/detectron2/wheels/cu117/torch1.13/index.htmlCUDA and GPU setup
nvidia-smi # Check GPU status nvcc --version # Check CUDA versionInstalling dependencies
pip install opencv-python fvcore pycocotoolsTroubleshooting installation
# Resolve missing headers: sudo apt-get install build-essentialUsing Conda environments
conda create -n detectron2 python=3.9 conda activate detectron2Verifying installation
python -c "import detectron2; print(detectron2.__version__)"Running demo scripts
python demo.py --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml --input input.jpg --output output.jpg --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/...Setting up Jupyter notebooks
conda install ipykernel python -m ipykernel install --user --name detectron2 --display-name "Detectron2"Using Detectron2 with Google Colab
# Example: !pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.htmlBest practices for setup
# Keep dependencies isolated and versions compatible
from detectron2 import model_zoo
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
Inference on images
predictor = DefaultPredictor(cfg)
outputs = predictor(cv2.imread("input.jpg"))
Visualizing predictions
from detectron2.utils.visualizer import Visualizer
v = Visualizer(img[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]))
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imshow("Predictions", out.get_image()[:, :, ::-1])
Batch inference
for img_path in image_list:
img = cv2.imread(img_path)
outputs = predictor(img)
Evaluating pretrained models
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
evaluator = COCOEvaluator("val_dataset", cfg, False)
inference_on_dataset(predictor.model, val_loader, evaluator)
Modifying confidence thresholdscfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # default 0.05Using Detectron2 APIs
from detectron2.engine import DefaultTrainer trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train()Output formats (JSON, COCO)
from detectron2.utils import comm
instances = outputs["instances"].to("cpu")
coco_json = instances_to_coco_json(instances)
Saving and loading models
torch.save(trainer.model.state_dict(), "model.pth")
model.load_state_dict(torch.load("model.pth"))
Demo projects# https://github.com/facebookresearch/detectron2/tree/main/demo
from detectron2.data import DatasetCatalog
DatasetCatalog.register("my_dataset", lambda: load_my_dataset())
Dataset format requirements
{
"images": [...],
"annotations": [...],
"categories": [...]
}
Annotation with COCO format
{
"bbox": [x, y, width, height],
"category_id": 1,
"segmentation": [...]
}
Using Pascal VOC format# xml_to_coco.py converts VOC XMLs to COCO JSONDataset verification
# Run script to confirm images and annotations matchData augmentation basics
from detectron2.data import transforms as T augmentations = [T.RandomFlip(), T.RandomBrightness()]Creating data loaders
from detectron2.data import build_detection_train_loader train_loader = build_detection_train_loader(cfg)Balancing dataset classes
# Custom sampler or loss weighting implementationDataset splitting (train/val/test)
# Use sklearn train_test_split or manual folder splitDebugging dataset issues
# Validate dataset with custom scripts or Detectron2’s data checking tools
model = torchvision.models.resnet50(pretrained=True)
Configuring training parameters
learning_rate = 0.001
batch_size = 32
epochs = 10
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Setting up the trainer
for epoch in range(epochs):
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
Using GPUs for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
inputs, labels = inputs.to(device), labels.to(device)
Understanding batch size and epochs
# Example batch size and epoch settings
batch_size = 64
epochs = 20
Monitoring training progress
print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
Saving checkpoints
torch.save(model.state_dict(), "checkpoint.pth")
Early stopping criteria
if val_loss > previous_loss:
early_stop_counter += 1
if early_stop_counter > patience:
break
Fine-tuning pretrained weights
for param in model.parameters():
param.requires_grad = False
for param in model.fc.parameters():
param.requires_grad = True
Evaluating model performance
from sklearn.metrics import accuracy_score
preds = model(inputs).argmax(dim=1)
acc = accuracy_score(labels.cpu(), preds.cpu())
model.backbone = torchvision.models.resnet101(pretrained=True)
Changing ROI heads
model.roi_heads.box_predictor = FastRCNNPredictor(num_classes)
Adding custom layers
model.add_module("dropout", torch.nn.Dropout(0.5))
Adjusting anchor sizes
anchor_generator = AnchorGenerator(sizes=((32, 64, 128),), aspect_ratios=((0.5, 1.0, 2.0),))
Using different loss functions
def focal_loss(pred, target, alpha=0.25, gamma=2):
...
Multi-task learning setup
loss = loss_task1 + loss_task2
Adding keypoint detection
model = KeypointRCNN(...)
Implementing panoptic segmentation
model = PanopticFPN(...)
Configuring different optimizers
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
Using mixed precision training
from torch.cuda.amp import GradScaler, autocast
scaler = GradScaler()
with autocast():
output = model(input)
model = torchvision.models.resnet50(pretrained=True)
Distributed training basics
torch.distributed.init_process_group(backend='nccl')
Gradient accumulation
optimizer.zero_grad()
for i in range(accumulation_steps):
loss = model(input[i])
loss.backward()
optimizer.step()
Learning rate scheduling
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
Data augmentation advanced
transform = A.Compose([A.Cutout(num_holes=8), A.Normalize()])
Using custom samplers
sampler = WeightedRandomSampler(weights, num_samples)
Training with mixed datasets
train_dataset = ConcatDataset([dataset1, dataset2])
Handling class imbalance
criterion = nn.CrossEntropyLoss(weight=class_weights)
Hyperparameter tuning
# Use grid search or random search
Debugging training issues
torch.autograd.set_detect_anomaly(True)
from sklearn.metrics import average_precision_score
mAP = average_precision_score(y_true, y_scores)
print("mAP:", mAP)
Precision-Recall curvesfrom sklearn.metrics import precision_recall_curve precision, recall, thresholds = precision_recall_curve(y_true, y_scores)Confusion matrices
from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_true, y_pred) print(cm)Evaluating segmentation masks
def iou_score(pred_mask, true_mask):
intersection = (pred_mask & true_mask).sum()
union = (pred_mask | true_mask).sum()
return intersection / union
Keypoint detection metrics# Use Object Keypoint Similarity (OKS) metric implementationsPanoptic quality metric
# Refer to panopticapi toolkit for implementationVisual evaluation tools
import matplotlib.pyplot as plt plt.imshow(image) plt.show()Error analysis
# Analyze misclassified samples manually or with scriptsCross-validation
from sklearn.model_selection import cross_val_score scores = cross_val_score(model, X, y, cv=5)Benchmarking models
# Evaluate multiple models on COCO or Pascal VOC datasets
# Example: TensorFlow Model Optimization Toolkit pruning APIQuantization techniques
import tensorflow_model_optimization as tfmot quantize_model = tfmot.quantization.keras.quantize_model(model)TensorRT integration
# Convert ONNX model to TensorRT engineONNX export and conversion
import torch torch.onnx.export(model, dummy_input, "model.onnx")Batch inference optimizations
batch_preds = model.predict(batch_input_data)Reducing latency
# Use async inference and warm-up runsUsing half-precision floats
# Enable mixed precision training/inference in TensorFlow or PyTorchMemory optimization
# Use memory profiling tools to detect leaksReal-time inference strategies
# Deploy lightweight models on edge devices with latency targetsProfiling inference
import time
start = time.time()
model.predict(input_data)
print("Inference time:", time.time() - start)
from detectron2.export import export_onnx_model export_onnx_model(cfg, model, "model.onnx")Building REST APIs with Flask/FastAPI
from fastapi import FastAPI, File, UploadFile
app = FastAPI()
@app.post("/predict/")
async def predict(file: UploadFile = File(...)):
# process and predict
return {"result": "prediction"}
Dockerizing models# Dockerfile example FROM pytorch/pytorch:latest COPY . /app WORKDIR /app RUN pip install -r requirements.txt CMD ["python", "app.py"]Deploying on cloud platforms
# Example: AWS CLI deploy commands or Terraform scriptsEdge device deployment (NVIDIA Jetson)
# Use JetPack SDK and TensorRT conversion toolsKubernetes deployment
kubectl apply -f deployment.yamlMonitoring deployed models
# Integrate Prometheus and Grafana dashboardsScaling inference servers
kubectl scale deployment/detectron2 --replicas=3Continuous integration pipelines
# Example: GitHub Actions YAML for build and deploySecurity considerations
# Implement OAuth2 or API key authentication
# Instance segmentation example separates two dogs as different masks
Mask R-CNN overview
# Uses backbone CNN, RPN, RoIAlign, and mask head for pixel-level segmentation
Dataset preparation for segmentation
# Prepare JSON annotations with polygons per instance for Detectron2
Training Mask R-CNN
from detectron2.engine import DefaultTrainer
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
Post-processing segmentation masks
# Apply mask refinement with OpenCV if needed
Evaluating segmentation performance
from detectron2.evaluation import COCOEvaluator
evaluator = COCOEvaluator("val_dataset", cfg, False)
Visualizing segmentation outputs
from detectron2.utils.visualizer import Visualizer
v = Visualizer(im[:, :, ::-1], metadata)
out = v.draw_instance_predictions(predictions)
Fine-tuning segmentation heads
# Modify cfg.MODEL.ROI_HEADS.NUM_CLASSES and train with reduced LR
Handling occlusions
# Train with augmented images containing occlusions
Use case examples
# Detectron2 projects on GitHub show use in various domains
# Detect keypoints like elbows, knees in images
Dataset requirements
# Annotations include keypoints list with visibility flags
Keypoint model architectures
# Detectron2’s Keypoint R-CNN uses ResNet+FPN backbone with keypoint head
Training keypoint models
cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17
Evaluating keypoint predictions
# Use COCO keypoint evaluator in Detectron2
Visualization of keypoints
v.draw_keypoints(predictions["instances"].pred_keypoints)
Combining with pose estimation
# Use detected keypoints for skeleton-based activity recognition
Multi-person keypoint detection
# Model handles instance separation and keypoint assignment
Real-time keypoint detection
# Use TensorRT or ONNX Runtime for acceleration
Applications and demos
# Open-source demos show keypoint tracking live
# Useful in autonomous driving for understanding both stuff and things classes
Panoptic FPN architecture
# Combines outputs of Mask R-CNN and semantic segmentation heads
Dataset preparation
# COCO panoptic format uses segment ids per pixel
Training panoptic models
cfg.MODEL.META_ARCHITECTURE = "PanopticFPN"
Evaluating panoptic quality
# PQ = (TP / (TP + 0.5 FP + 0.5 FN)) weighted by IoU and class accuracy
Combining semantic & instance outputs
# Post-processing merges overlapping predictions consistently
Visualizing panoptic results
# Visualizer overlays both semantic and instance segments
Handling complex scenes
# Train on diverse datasets with challenging examples
Optimizing panoptic models
# Use mixed precision training for faster convergence
Industry applications
# Real-time panoptic segmentation powers autonomous navigation
# Example: Check dataset label completeness
assert all([len(ann["bbox"]) == 4 for ann in dataset_dicts])
from detectron2.utils.visualizer import Visualizer
v = Visualizer(image[:, :, ::-1], metadata)
out = v.draw_dataset_dict(dataset_dict)
plt.imshow(out.get_image())
for d in dataset_dicts[:3]:
img = cv2.imread(d["file_name"])
v = Visualizer(img[:, :, ::-1], metadata)
out = v.draw_dataset_dict(d)
cv2.imshow("Sample", out.get_image()[:, :, ::-1])
cv2.waitKey(0)
from detectron2.utils.logger import setup_logger
logger = setup_logger()
logger.info("Starting training process...")
# Check for data loader errors or batch size problems
from detectron2.engine import DefaultTrainer
trainer = DefaultTrainer(cfg)
metrics = trainer.test(cfg, model)
print(metrics)
data_loader = build_detection_train_loader(cfg)
for batch in data_loader:
print(batch)
break
from detectron2.utils.events import EventStorage
with EventStorage(0) as storage:
storage.put_scalar("loss", loss_value)
import torch
torch.cuda.empty_cache()
# Train on small subset to debug quickly
cfg.DATASETS.TRAIN = ("my_small_dataset",)
import cv2
img = cv2.imread("image.jpg")
outputs = predictor(img)
cv2.imshow("Output", img)
cv2.waitKey(0)
# Export model to ONNX and load in TensorFlow
from detectron2.export import TracingAdapter
traced_model = TracingAdapter(model, cfg)
traced_model.export_onnx("model.onnx")
# Configure DeepStream pipeline with Detectron2 models
# Run YOLO for fast detection, refine with Detectron2
# ROS node subscribes to camera feed, runs Detectron2 inference
# Use Flask or FastAPI to expose model predictions
# Optimize model for mobile inference with quantization
cap = cv2.VideoCapture("video.mp4")
while True:
ret, frame = cap.read()
outputs = predictor(frame)
# Sequentially apply models and merge outputs
# Use hooks to extract activations in Detectron2
# Use SHAP or LIME for feature attribution
# Aggregate saliency maps over dataset samples
import torch
from pytorch_grad_cam import GradCAM
# Overlay mask heatmaps on original images
# Stratify evaluation metrics by subgroup
from captum.attr import IntegratedGradients
# Provide visual explanations alongside predictions in UI
# Write model usage guidelines and disclaimers
# Use model cards and datasheets for transparency
# Install latest Detectron2 pip install -U detectron2
# Example config usage in Detectron2 cfg.MODEL.META_ARCHITECTURE = "CascadeRCNN"
# DETR model example from detectron2.modeling import build_model model = build_model(cfg)
// Research frameworks combine pseudo-labeling with Detectron2
// Tools generate synthetic images and annotations for training
// Pretrain backbone with SSL, then fine-tune Detectron2 head
// Adaptation with adversarial training or feature alignment
// Fine-tune Detectron2 on small datasets with transfer learning
// Run evaluation scripts on benchmark datasets
// Research papers often address these challenges
// Job listings require experience in Detectron2 and related tools
// Upload notebooks and demos to GitHub repositories
// Fork repo, create pull request with improvements
// Submit models for leaderboard ranking
// Write blogs on Medium or Arxiv papers
// Attend workshops to stay updated
// Join AI/ML groups on LinkedIn, Discord
// Take courses on Coursera, Udacity, or AWS AI certifications
// Use platforms like Upwork to find clients
// Use RSS feeds and Twitter for updates