import detectron2 from detectron2.engine import DefaultTrainerHistory and development
# Released in 2019 as successor to DetectronKey features
# Supports Mask R-CNN, Faster R-CNN, RetinaNet, and moreUse cases and applications
# Example: pedestrian detection in self-driving carsInstalling Detectron2
pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.htmlSetting up the environment
# Ensure compatible CUDA and PyTorch versions before installingOverview of PyTorch framework
import torch x = torch.tensor([1, 2, 3]) print(x)Detectron2 vs Detectron (v1)
# Detectron2 supports newer architectures and training paradigmsCommunity and resources
# https://github.com/facebookresearch/detectron2Basic architecture overview
# Backbone + RPN + ROI Heads = end-to-end detection pipeline
# Detectron2 returns bounding boxes and class labels for objectsBounding boxes explained
# Format: [x_min, y_min, x_max, y_max]Common detection algorithms
# Detectron2 supports many of these algorithms out-of-the-boxIntersection over Union (IoU)
# IoU = (Area of Overlap) / (Area of Union)Non-Maximum Suppression (NMS)
# Keeps boxes with highest scores, suppresses othersPrecision, recall, and mAP
# mAP is standard metric for object detection benchmarksDataset formats (COCO, Pascal VOC)
# COCO uses JSON, VOC uses XML annotation filesPreparing datasets for Detectron2
from detectron2.data import DatasetCatalog # Register dataset for trainingAnnotation tools overview
# LabelImg exports annotations in VOC XML or YOLO TXT formatDetectron2 data pipeline
from detectron2.data import build_detection_train_loader
# Modules imported separately for customizationBackbone networks
from detectron2.modeling import build_backboneRegion Proposal Network (RPN)
# Generates region proposals for RoI poolingROI Pooling and Align
# ROI Align preferred for mask and keypoint accuracyBox Head and Mask Head
# Separate heads for detection and segmentation tasksAnchor generation
# Anchors guide RPN and detection headsTraining and inference flow
trainer = DefaultTrainer(cfg) trainer.train()Configuration system
from detectron2.config import get_cfg cfg = get_cfg()Model zoo overview
# Download and load pre-trained models for faster trainingCustom model pipelines
# Extend classes and override methods for customization
# Linux example: pip install detectron2 -f \ https://dl.fbaipublicfiles.com/detectron2/wheels/cu117/torch1.13/index.htmlCUDA and GPU setup
nvidia-smi # Check GPU status nvcc --version # Check CUDA versionInstalling dependencies
pip install opencv-python fvcore pycocotoolsTroubleshooting installation
# Resolve missing headers: sudo apt-get install build-essentialUsing Conda environments
conda create -n detectron2 python=3.9 conda activate detectron2Verifying installation
python -c "import detectron2; print(detectron2.__version__)"Running demo scripts
python demo.py --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml --input input.jpg --output output.jpg --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/...Setting up Jupyter notebooks
conda install ipykernel python -m ipykernel install --user --name detectron2 --display-name "Detectron2"Using Detectron2 with Google Colab
# Example: !pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.htmlBest practices for setup
# Keep dependencies isolated and versions compatible
from detectron2 import model_zoo cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")Inference on images
predictor = DefaultPredictor(cfg) outputs = predictor(cv2.imread("input.jpg"))Visualizing predictions
from detectron2.utils.visualizer import Visualizer v = Visualizer(img[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0])) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) cv2.imshow("Predictions", out.get_image()[:, :, ::-1])Batch inference
for img_path in image_list: img = cv2.imread(img_path) outputs = predictor(img)Evaluating pretrained models
from detectron2.evaluation import COCOEvaluator, inference_on_dataset evaluator = COCOEvaluator("val_dataset", cfg, False) inference_on_dataset(predictor.model, val_loader, evaluator)Modifying confidence thresholds
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # default 0.05Using Detectron2 APIs
from detectron2.engine import DefaultTrainer trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train()Output formats (JSON, COCO)
from detectron2.utils import comm instances = outputs["instances"].to("cpu") coco_json = instances_to_coco_json(instances)Saving and loading models
torch.save(trainer.model.state_dict(), "model.pth") model.load_state_dict(torch.load("model.pth"))Demo projects
# https://github.com/facebookresearch/detectron2/tree/main/demo
from detectron2.data import DatasetCatalog DatasetCatalog.register("my_dataset", lambda: load_my_dataset())Dataset format requirements
{ "images": [...], "annotations": [...], "categories": [...] }Annotation with COCO format
{ "bbox": [x, y, width, height], "category_id": 1, "segmentation": [...] }Using Pascal VOC format
# xml_to_coco.py converts VOC XMLs to COCO JSONDataset verification
# Run script to confirm images and annotations matchData augmentation basics
from detectron2.data import transforms as T augmentations = [T.RandomFlip(), T.RandomBrightness()]Creating data loaders
from detectron2.data import build_detection_train_loader train_loader = build_detection_train_loader(cfg)Balancing dataset classes
# Custom sampler or loss weighting implementationDataset splitting (train/val/test)
# Use sklearn train_test_split or manual folder splitDebugging dataset issues
# Validate dataset with custom scripts or Detectron2’s data checking tools
model = torchvision.models.resnet50(pretrained=True)Configuring training parameters
learning_rate = 0.001 batch_size = 32 epochs = 10 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)Setting up the trainer
for epoch in range(epochs): for inputs, labels in train_loader: optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step()Using GPUs for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) inputs, labels = inputs.to(device), labels.to(device)Understanding batch size and epochs
# Example batch size and epoch settings batch_size = 64 epochs = 20Monitoring training progress
print(f"Epoch {epoch}, Loss: {loss.item():.4f}")Saving checkpoints
torch.save(model.state_dict(), "checkpoint.pth")Early stopping criteria
if val_loss > previous_loss: early_stop_counter += 1 if early_stop_counter > patience: breakFine-tuning pretrained weights
for param in model.parameters(): param.requires_grad = False for param in model.fc.parameters(): param.requires_grad = TrueEvaluating model performance
from sklearn.metrics import accuracy_score preds = model(inputs).argmax(dim=1) acc = accuracy_score(labels.cpu(), preds.cpu())
model.backbone = torchvision.models.resnet101(pretrained=True)Changing ROI heads
model.roi_heads.box_predictor = FastRCNNPredictor(num_classes)Adding custom layers
model.add_module("dropout", torch.nn.Dropout(0.5))Adjusting anchor sizes
anchor_generator = AnchorGenerator(sizes=((32, 64, 128),), aspect_ratios=((0.5, 1.0, 2.0),))Using different loss functions
def focal_loss(pred, target, alpha=0.25, gamma=2): ...Multi-task learning setup
loss = loss_task1 + loss_task2Adding keypoint detection
model = KeypointRCNN(...)Implementing panoptic segmentation
model = PanopticFPN(...)Configuring different optimizers
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)Using mixed precision training
from torch.cuda.amp import GradScaler, autocast scaler = GradScaler() with autocast(): output = model(input)
model = torchvision.models.resnet50(pretrained=True)Distributed training basics
torch.distributed.init_process_group(backend='nccl')Gradient accumulation
optimizer.zero_grad() for i in range(accumulation_steps): loss = model(input[i]) loss.backward() optimizer.step()Learning rate scheduling
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)Data augmentation advanced
transform = A.Compose([A.Cutout(num_holes=8), A.Normalize()])Using custom samplers
sampler = WeightedRandomSampler(weights, num_samples)Training with mixed datasets
train_dataset = ConcatDataset([dataset1, dataset2])Handling class imbalance
criterion = nn.CrossEntropyLoss(weight=class_weights)Hyperparameter tuning
# Use grid search or random searchDebugging training issues
torch.autograd.set_detect_anomaly(True)
from sklearn.metrics import average_precision_score mAP = average_precision_score(y_true, y_scores) print("mAP:", mAP)Precision-Recall curves
from sklearn.metrics import precision_recall_curve precision, recall, thresholds = precision_recall_curve(y_true, y_scores)Confusion matrices
from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_true, y_pred) print(cm)Evaluating segmentation masks
def iou_score(pred_mask, true_mask): intersection = (pred_mask & true_mask).sum() union = (pred_mask | true_mask).sum() return intersection / unionKeypoint detection metrics
# Use Object Keypoint Similarity (OKS) metric implementationsPanoptic quality metric
# Refer to panopticapi toolkit for implementationVisual evaluation tools
import matplotlib.pyplot as plt plt.imshow(image) plt.show()Error analysis
# Analyze misclassified samples manually or with scriptsCross-validation
from sklearn.model_selection import cross_val_score scores = cross_val_score(model, X, y, cv=5)Benchmarking models
# Evaluate multiple models on COCO or Pascal VOC datasets
# Example: TensorFlow Model Optimization Toolkit pruning APIQuantization techniques
import tensorflow_model_optimization as tfmot quantize_model = tfmot.quantization.keras.quantize_model(model)TensorRT integration
# Convert ONNX model to TensorRT engineONNX export and conversion
import torch torch.onnx.export(model, dummy_input, "model.onnx")Batch inference optimizations
batch_preds = model.predict(batch_input_data)Reducing latency
# Use async inference and warm-up runsUsing half-precision floats
# Enable mixed precision training/inference in TensorFlow or PyTorchMemory optimization
# Use memory profiling tools to detect leaksReal-time inference strategies
# Deploy lightweight models on edge devices with latency targetsProfiling inference
import time start = time.time() model.predict(input_data) print("Inference time:", time.time() - start)
from detectron2.export import export_onnx_model export_onnx_model(cfg, model, "model.onnx")Building REST APIs with Flask/FastAPI
from fastapi import FastAPI, File, UploadFile app = FastAPI() @app.post("/predict/") async def predict(file: UploadFile = File(...)): # process and predict return {"result": "prediction"}Dockerizing models
# Dockerfile example FROM pytorch/pytorch:latest COPY . /app WORKDIR /app RUN pip install -r requirements.txt CMD ["python", "app.py"]Deploying on cloud platforms
# Example: AWS CLI deploy commands or Terraform scriptsEdge device deployment (NVIDIA Jetson)
# Use JetPack SDK and TensorRT conversion toolsKubernetes deployment
kubectl apply -f deployment.yamlMonitoring deployed models
# Integrate Prometheus and Grafana dashboardsScaling inference servers
kubectl scale deployment/detectron2 --replicas=3Continuous integration pipelines
# Example: GitHub Actions YAML for build and deploySecurity considerations
# Implement OAuth2 or API key authentication
# Instance segmentation example separates two dogs as different masksMask R-CNN overview
# Uses backbone CNN, RPN, RoIAlign, and mask head for pixel-level segmentationDataset preparation for segmentation
# Prepare JSON annotations with polygons per instance for Detectron2Training Mask R-CNN
from detectron2.engine import DefaultTrainer trainer = DefaultTrainer(cfg) trainer.resume_or_load(resume=False) trainer.train()Post-processing segmentation masks
# Apply mask refinement with OpenCV if neededEvaluating segmentation performance
from detectron2.evaluation import COCOEvaluator evaluator = COCOEvaluator("val_dataset", cfg, False)Visualizing segmentation outputs
from detectron2.utils.visualizer import Visualizer v = Visualizer(im[:, :, ::-1], metadata) out = v.draw_instance_predictions(predictions)Fine-tuning segmentation heads
# Modify cfg.MODEL.ROI_HEADS.NUM_CLASSES and train with reduced LRHandling occlusions
# Train with augmented images containing occlusionsUse case examples
# Detectron2 projects on GitHub show use in various domains
# Detect keypoints like elbows, knees in imagesDataset requirements
# Annotations include keypoints list with visibility flagsKeypoint model architectures
# Detectron2’s Keypoint R-CNN uses ResNet+FPN backbone with keypoint headTraining keypoint models
cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17Evaluating keypoint predictions
# Use COCO keypoint evaluator in Detectron2Visualization of keypoints
v.draw_keypoints(predictions["instances"].pred_keypoints)Combining with pose estimation
# Use detected keypoints for skeleton-based activity recognitionMulti-person keypoint detection
# Model handles instance separation and keypoint assignmentReal-time keypoint detection
# Use TensorRT or ONNX Runtime for accelerationApplications and demos
# Open-source demos show keypoint tracking live
# Useful in autonomous driving for understanding both stuff and things classesPanoptic FPN architecture
# Combines outputs of Mask R-CNN and semantic segmentation headsDataset preparation
# COCO panoptic format uses segment ids per pixelTraining panoptic models
cfg.MODEL.META_ARCHITECTURE = "PanopticFPN"Evaluating panoptic quality
# PQ = (TP / (TP + 0.5 FP + 0.5 FN)) weighted by IoU and class accuracyCombining semantic & instance outputs
# Post-processing merges overlapping predictions consistentlyVisualizing panoptic results
# Visualizer overlays both semantic and instance segmentsHandling complex scenes
# Train on diverse datasets with challenging examplesOptimizing panoptic models
# Use mixed precision training for faster convergenceIndustry applications
# Real-time panoptic segmentation powers autonomous navigation
# Example: Check dataset label completeness assert all([len(ann["bbox"]) == 4 for ann in dataset_dicts])
from detectron2.utils.visualizer import Visualizer v = Visualizer(image[:, :, ::-1], metadata) out = v.draw_dataset_dict(dataset_dict) plt.imshow(out.get_image())
for d in dataset_dicts[:3]: img = cv2.imread(d["file_name"]) v = Visualizer(img[:, :, ::-1], metadata) out = v.draw_dataset_dict(d) cv2.imshow("Sample", out.get_image()[:, :, ::-1]) cv2.waitKey(0)
from detectron2.utils.logger import setup_logger logger = setup_logger() logger.info("Starting training process...")
# Check for data loader errors or batch size problems
from detectron2.engine import DefaultTrainer trainer = DefaultTrainer(cfg) metrics = trainer.test(cfg, model) print(metrics)
data_loader = build_detection_train_loader(cfg) for batch in data_loader: print(batch) break
from detectron2.utils.events import EventStorage with EventStorage(0) as storage: storage.put_scalar("loss", loss_value)
import torch torch.cuda.empty_cache()
# Train on small subset to debug quickly cfg.DATASETS.TRAIN = ("my_small_dataset",)
import cv2 img = cv2.imread("image.jpg") outputs = predictor(img) cv2.imshow("Output", img) cv2.waitKey(0)
# Export model to ONNX and load in TensorFlow
from detectron2.export import TracingAdapter traced_model = TracingAdapter(model, cfg) traced_model.export_onnx("model.onnx")
# Configure DeepStream pipeline with Detectron2 models
# Run YOLO for fast detection, refine with Detectron2
# ROS node subscribes to camera feed, runs Detectron2 inference
# Use Flask or FastAPI to expose model predictions
# Optimize model for mobile inference with quantization
cap = cv2.VideoCapture("video.mp4") while True: ret, frame = cap.read() outputs = predictor(frame)
# Sequentially apply models and merge outputs
# Use hooks to extract activations in Detectron2
# Use SHAP or LIME for feature attribution
# Aggregate saliency maps over dataset samples
import torch from pytorch_grad_cam import GradCAM
# Overlay mask heatmaps on original images
# Stratify evaluation metrics by subgroup
from captum.attr import IntegratedGradients
# Provide visual explanations alongside predictions in UI
# Write model usage guidelines and disclaimers
# Use model cards and datasheets for transparency
# Install latest Detectron2 pip install -U detectron2
# Example config usage in Detectron2 cfg.MODEL.META_ARCHITECTURE = "CascadeRCNN"
# DETR model example from detectron2.modeling import build_model model = build_model(cfg)
// Research frameworks combine pseudo-labeling with Detectron2
// Tools generate synthetic images and annotations for training
// Pretrain backbone with SSL, then fine-tune Detectron2 head
// Adaptation with adversarial training or feature alignment
// Fine-tune Detectron2 on small datasets with transfer learning
// Run evaluation scripts on benchmark datasets
// Research papers often address these challenges
// Job listings require experience in Detectron2 and related tools
// Upload notebooks and demos to GitHub repositories
// Fork repo, create pull request with improvements
// Submit models for leaderboard ranking
// Write blogs on Medium or Arxiv papers
// Attend workshops to stay updated
// Join AI/ML groups on LinkedIn, Discord
// Take courses on Coursera, Udacity, or AWS AI certifications
// Use platforms like Upwork to find clients
// Use RSS feeds and Twitter for updates