# Import the OpenCV library import cv2 # Print the OpenCV version to confirm installation print(cv2.__version__) # Example output: 4.8.0
# Print history information print("OpenCV started in 1999 at Intel and became open-source in 2000.")
# Basic OpenCV installation # pip install opencv-python # For full functionality including contrib modules # pip install opencv-contrib-python
# Optional: Create a virtual environment (Linux/macOS) # python3 -m venv opencv_env # source opencv_env/bin/activate # Windows version # python -m venv opencv_env # .\opencv_env\Scripts\activate # Then install OpenCV # pip install opencv-python
# Compare OpenCV and PIL image loading import cv2 from PIL import Image # Using OpenCV to read image img_cv2 = cv2.imread("image.jpg") # Using PIL to open image img_pil = Image.open("image.jpg")
# Import OpenCV import cv2 # Load an image (make sure sample.jpg exists) img = cv2.imread("sample.jpg") # Show image in a window cv2.imshow("My First OpenCV Window", img) # Wait for any key press cv2.waitKey(0) # Close all OpenCV windows cv2.destroyAllWindows()
import cv2 # Read color image img = cv2.imread("photo.jpg", 1) # 1 = color # Display the image cv2.imshow("Color Image", img) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 # Read the image img = cv2.imread("photo.jpg") # Save it as a new file cv2.imwrite("copy_photo.png", img) # Saves as PNG
import cv2 img = cv2.imread("photo.jpg") print("Shape:", img.shape) # (height, width, channels) print("Size:", img.size) # Total number of pixels print("Data type:", img.dtype) # Type of each pixel
import cv2 # Convert color image to grayscale img = cv2.imread("photo.jpg") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) cv2.imshow("Gray Image", gray) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") # Define ROI coordinates: y1:y2, x1:x2 roi = img[100:200, 150:250] # Crop region # Show the cropped part cv2.imshow("ROI", roi) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") # Resize to 300x300 resized = cv2.resize(img, (300, 300)) cv2.imshow("Resized", resized) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = np.zeros((400, 400, 3), dtype=np.uint8) # Draw a blue line cv2.line(img, (50, 50), (350, 50), (255, 0, 0), 3) # Draw a green circle cv2.circle(img, (200, 200), 50, (0, 255, 0), 2) cv2.imshow("Shapes", img) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = np.ones((400, 400, 3), dtype=np.uint8) * 255 # Draw rectangle cv2.rectangle(img, (100, 100), (300, 200), (0, 0, 255), 2) # Draw ellipse cv2.ellipse(img, (200, 300), (100, 50), 0, 0, 360, (0, 100, 200), 2) cv2.imshow("Shapes", img) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = np.ones((400, 400, 3), dtype=np.uint8) * 255 # Define polygon points pts = np.array([[50, 300], [100, 200], [200, 250], [300, 300]], np.int32) pts = pts.reshape((-1, 1, 2)) # Draw closed polygon cv2.polylines(img, [pts], True, (255, 0, 255), 2) cv2.imshow("Polygon", img) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = np.zeros((300, 600, 3), dtype=np.uint8) # Put text cv2.putText(img, "OpenCV Rocks!", (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 0), 2) cv2.imshow("Text", img) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = np.ones((300, 300, 3), dtype=np.uint8) * 255 # Filled red rectangle cv2.rectangle(img, (50, 50), (250, 150), (0, 0, 255), -1) cv2.imshow("Filled", img) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np drawing = False ix, iy = -1, -1 img = np.ones((400, 400, 3), dtype=np.uint8) * 255 def draw_circle(event, x, y, flags, param): global ix, iy, drawing if event == cv2.EVENT_LBUTTONDOWN: drawing = True ix, iy = x, y elif event == cv2.EVENT_MOUSEMOVE and drawing: cv2.circle(img, (x, y), 5, (255, 0, 0), -1) elif event == cv2.EVENT_LBUTTONUP: drawing = False cv2.namedWindow("Draw") cv2.setMouseCallback("Draw", draw_circle) while True: cv2.imshow("Draw", img) if cv2.waitKey(1) & 0xFF == 27: break cv2.destroyAllWindows()
import cv2 import numpy as np img = cv2.imread("photo.jpg") # Define translation matrix: move 100 right, 50 down M = np.float32([[1, 0, 100], [0, 1, 50]]) translated = cv2.warpAffine(img, M, (img.shape[1], img.shape[0])) cv2.imshow("Translated", translated) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") (h, w) = img.shape[:2] center = (w // 2, h // 2) # Rotate by 45 degrees around center M = cv2.getRotationMatrix2D(center, 45, 1.0) rotated = cv2.warpAffine(img, M, (w, h)) cv2.imshow("Rotated", rotated) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") # Scale image by 50% scaled = cv2.resize(img, None, fx=0.5, fy=0.5) cv2.imshow("Scaled", scaled) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") # Flip horizontally (flipCode=1) flipped = cv2.flip(img, 1) cv2.imshow("Flipped", flipped) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") # Crop rectangle: y:100-300, x:150-350 cropped = img[100:300, 150:350] cv2.imshow("Cropped", cropped) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = cv2.imread("photo.jpg") # Affine transform points pts1 = np.float32([[50,50], [200,50], [50,200]]) pts2 = np.float32([[10,100], [200,50], [100,250]]) M = cv2.getAffineTransform(pts1, pts2) affine = cv2.warpAffine(img, M, (img.shape[1], img.shape[0])) cv2.imshow("Affine", affine) cv2.waitKey(0) cv2.destroyAllWindows()
# BGR is OpenCV default color space # Different models suit different tasks like HSV for color filtering
import cv2 img = cv2.imread("photo.jpg") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) cv2.imshow("Grayscale", gray) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) cv2.imshow("HSV", hsv) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB) cv2.imshow("LAB", lab) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") b, g, r = cv2.split(img) # Merge channels back merged = cv2.merge([b, g, r])
import cv2 import numpy as np img = cv2.imread("photo.jpg") hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # Define lower and upper bounds for blue color lower_blue = np.array([100, 150, 0]) upper_blue = np.array([140, 255, 255]) mask = cv2.inRange(hsv, lower_blue, upper_blue) result = cv2.bitwise_and(img, img, mask=mask) cv2.imshow("Filtered Blue", result) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg", 0) # Grayscale _, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) cv2.imshow("Global Threshold", thresh) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg", 0) adaptive = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2) cv2.imshow("Adaptive Threshold", adaptive) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg", 0) _, otsu = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) cv2.imshow("Otsu Threshold", otsu) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg", 0) _, inv_thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV) cv2.imshow("Inverted Threshold", inv_thresh) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg", 0) _, trunc = cv2.threshold(img, 127, 255, cv2.THRESH_TRUNC) _, tozero = cv2.threshold(img, 127, 255, cv2.THRESH_TOZERO) cv2.imshow("Trunc Threshold", trunc) cv2.imshow("ToZero Threshold", tozero) cv2.waitKey(0) cv2.destroyAllWindows()
# Typically done by stacking thresholded images and showing them # (Implementation is similar to above, combining outputs)
import cv2 img = cv2.imread("photo.jpg") # Apply averaging filter with 5x5 kernel blur = cv2.blur(img, (5, 5)) cv2.imshow("Averaging", blur) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") # Apply Gaussian blur with 5x5 kernel gauss = cv2.GaussianBlur(img, (5, 5), 0) cv2.imshow("Gaussian Blur", gauss) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") # Apply median blur with kernel size 5 median = cv2.medianBlur(img, 5) cv2.imshow("Median Blur", median) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg") # Apply bilateral filter bilateral = cv2.bilateralFilter(img, 9, 75, 75) cv2.imshow("Bilateral Filter", bilateral) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = cv2.imread("photo.jpg") # Kernel for sharpening kernel = np.array([[0, -1, 0], [-1, 5,-1], [0, -1, 0]]) sharpened = cv2.filter2D(img, -1, kernel) cv2.imshow("Sharpened", sharpened) cv2.waitKey(0) cv2.destroyAllWindows()
# Noise reduction often combines blurring and filtering # Use above blurring functions depending on noise type
import cv2 import numpy as np img = cv2.imread("binary_image.png", 0) kernel = np.ones((5,5), np.uint8) # Erode image erosion = cv2.erode(img, kernel, iterations=1) # Dilate image dilation = cv2.dilate(img, kernel, iterations=1) cv2.imshow("Erosion", erosion) cv2.imshow("Dilation", dilation) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = cv2.imread("binary_image.png", 0) kernel = np.ones((5,5), np.uint8) # Opening removes noise opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel) # Closing fills holes closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel) cv2.imshow("Opening", opening) cv2.imshow("Closing", closing) cv2.waitKey(0) cv2.destroyAllWindows()
gradient = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel) cv2.imshow("Morphological Gradient", gradient) cv2.waitKey(0) cv2.destroyAllWindows()
tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel) blackhat = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel) cv2.imshow("Top Hat", tophat) cv2.imshow("Black Hat", blackhat) cv2.waitKey(0) cv2.destroyAllWindows()
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5)) ellipse_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5)) cross_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (5,5))
# Example: Clean noisy text regions before OCR cleaned = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
import cv2 img = cv2.imread("photo.jpg", 0) # Load grayscale # Sobel edge detection in X and Y directions sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3) sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3) cv2.imshow("Sobel X", sobelx) cv2.imshow("Sobel Y", sobely) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg", 0) scharrx = cv2.Scharr(img, cv2.CV_64F, 1, 0) scharry = cv2.Scharr(img, cv2.CV_64F, 0, 1) cv2.imshow("Scharr X", scharrx) cv2.imshow("Scharr Y", scharry) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg", 0) laplacian = cv2.Laplacian(img, cv2.CV_64F) cv2.imshow("Laplacian", laplacian) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg", 0) edges = cv2.Canny(img, 100, 200) cv2.imshow("Canny Edges", edges) cv2.waitKey(0) cv2.destroyAllWindows()
# Compare Sobel, Laplacian, and Canny by displaying results side-by-side # (Combine above examples visually in an app)
import cv2 img = cv2.imread("shapes.png", 0) ret, thresh = cv2.threshold(img, 127, 255, 0) contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
img_color = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) for i, contour in enumerate(contours): cv2.drawContours(img_color, [contour], -1, (0,255,0), 2) # Label contour x, y = contour[0][0] cv2.putText(img_color, f'#{i+1}', (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1) cv2.imshow("Contours", img_color) cv2.waitKey(0) cv2.destroyAllWindows()
for contour in contours: area = cv2.contourArea(contour) perimeter = cv2.arcLength(contour, True) print(f"Area: {area}, Perimeter: {perimeter}")
for contour in contours: epsilon = 0.02 * cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, epsilon, True) print(f"Approximated points: {len(approx)}")
for contour in contours: hull = cv2.convexHull(contour) defects = cv2.convexityDefects(contour, cv2.convexHull(contour, returnPoints=False)) print(f"Convex Hull points: {len(hull)}") if defects is not None: print(f"Defects count: {defects.shape[0]}")
import cv2 from matplotlib import pyplot as plt img = cv2.imread("photo.jpg", 0) # Grayscale hist = cv2.calcHist([img], [0], None, [256], [0,256]) plt.plot(hist) plt.title("Grayscale Histogram") plt.xlabel("Pixel Intensity") plt.ylabel("Frequency") plt.show()
img = cv2.imread("photo.jpg", 0) equalized = cv2.equalizeHist(img) cv2.imshow("Equalized", equalized) cv2.waitKey(0) cv2.destroyAllWindows()
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) clahe_img = clahe.apply(img) cv2.imshow("CLAHE", clahe_img) cv2.waitKey(0) cv2.destroyAllWindows()
img = cv2.imread("photo.jpg") hist = cv2.calcHist([img], [0, 1], None, [32, 32], [0,256, 0,256]) print(hist.shape) # 2D histogram shape
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) roi_hist = cv2.calcHist([hsv], [0], None, [180], [0,180]) cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX) back_proj = cv2.calcBackProject([hsv], [0], roi_hist, [0,180], 1) cv2.imshow("Backprojection", back_proj) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread("photo.jpg", 0) grad_x = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3) grad_y = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3) cv2.imshow("Gradient X", grad_x) cv2.imshow("Gradient Y", grad_y) cv2.waitKey(0) cv2.destroyAllWindows()
import numpy as np magnitude = cv2.magnitude(grad_x, grad_y) angle = cv2.phase(grad_x, grad_y, angleInDegrees=True) cv2.imshow("Gradient Magnitude", magnitude) cv2.waitKey(0) cv2.destroyAllWindows()
laplacian = cv2.Laplacian(img, cv2.CV_64F) cv2.imshow("Laplacian", laplacian) cv2.waitKey(0) cv2.destroyAllWindows()
grad_display = cv2.convertScaleAbs(magnitude) cv2.imshow("Gradient Visualization", grad_display) cv2.waitKey(0) cv2.destroyAllWindows()
# Example: Use gradient magnitude to create edge mask _, edge_mask = cv2.threshold(grad_display, 50, 255, cv2.THRESH_BINARY) cv2.imshow("Edge Mask", edge_mask) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = cv2.imread("photo.jpg") pts1 = np.float32([[50,50],[200,50],[50,200]]) pts2 = np.float32([[10,100],[200,50],[100,250]]) M = cv2.getAffineTransform(pts1, pts2) affine = cv2.warpAffine(img, M, (img.shape[1], img.shape[0])) cv2.imshow("Affine Transform", affine) cv2.waitKey(0) cv2.destroyAllWindows()
pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]]) pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]]) M = cv2.getPerspectiveTransform(pts1, pts2) perspective = cv2.warpPerspective(img, M, (300,300)) cv2.imshow("Perspective Transform", perspective) cv2.waitKey(0) cv2.destroyAllWindows()
# Image warping example uses warpAffine or warpPerspective functions # (see above examples)
(h, w) = img.shape[:2] center = (w//2, h//2) M = cv2.getRotationMatrix2D(center, 45, 1.0) rotated = cv2.warpAffine(img, M, (w, h)) cv2.imshow("Rotated Image", rotated) cv2.waitKey(0) cv2.destroyAllWindows()
# Skew correction involves detecting edges and applying perspective transform # Example code is similar to perspective transform example
# Real-world correction often combines edge detection and perspective warp # See perspective transform example above
import cv2 img1 = cv2.imread("mask1.png", 0) img2 = cv2.imread("mask2.png", 0) bitwise_and = cv2.bitwise_and(img1, img2) bitwise_or = cv2.bitwise_or(img1, img2) bitwise_xor = cv2.bitwise_xor(img1, img2) cv2.imshow("AND", bitwise_and) cv2.imshow("OR", bitwise_or) cv2.imshow("XOR", bitwise_xor) cv2.waitKey(0) cv2.destroyAllWindows()
img = cv2.imread("photo.jpg") mask = cv2.imread("mask.png", 0) masked = cv2.bitwise_and(img, img, mask=mask) cv2.imshow("Masked Image", masked) cv2.waitKey(0) cv2.destroyAllWindows()
# Blend example: # result = bitwise_and + bitwise_or or use addWeighted for smooth blend
# Overlaying uses masks with bitwise_and and bitwise_or
# Dynamic combination handled with bitwise and alpha blending
# Example: Extract segmented object using mask segmented = cv2.bitwise_and(img, img, mask=mask)
import cv2 import numpy as np img = cv2.imread("scene.jpg", 0) template = cv2.imread("template.jpg", 0) res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) top_left = max_loc h, w = template.shape bottom_right = (top_left[0] + w, top_left[1] + h) cv2.rectangle(img, top_left, bottom_right, 255, 2) cv2.imshow("Detected Template", img) cv2.waitKey(0) cv2.destroyAllWindows()
threshold = 0.8 loc = np.where(res >= threshold) for pt in zip(*loc[::-1]): cv2.rectangle(img, pt, (pt[0]+w, pt[1]+h), (0,255,0), 2) cv2.imshow("Multiple Matches", img) cv2.waitKey(0) cv2.destroyAllWindows()
# Loop over several templates and apply matchTemplate each time
# Rotate template with cv2.getRotationMatrix2D and match again
# Capture video, apply matchTemplate on each frame, draw rectangle around match
import cv2 cap = cv2.VideoCapture("video.mp4") # Open video file while cap.isOpened(): ret, frame = cap.read() # Read frame if not ret: break # End of video cv2.imshow("Video Frame", frame) if cv2.waitKey(25) & 0xFF == ord('q'): # Press 'q' to quit break cap.release() cv2.destroyAllWindows()
cap = cv2.VideoCapture(0) # Open default camera while True: ret, frame = cap.read() if not ret: break cv2.imshow("Webcam", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
cap = cv2.VideoCapture(0) # Define codec and create VideoWriter object fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640,480)) while True: ret, frame = cap.read() if not ret: break out.write(frame) # Write frame to file cv2.imshow("Recording", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() out.release() cv2.destroyAllWindows()
cap = cv2.VideoCapture("video.mp4") while cap.isOpened(): ret, frame = cap.read() if not ret: break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Convert frame to grayscale cv2.imshow("Gray Video", gray) if cv2.waitKey(25) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break edges = cv2.Canny(frame, 100, 200) # Apply Canny edge detector cv2.imshow("Edges", edges) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
# Example: Quit on 'q', pause on 'p' paused = False cap = cv2.VideoCapture("video.mp4") while cap.isOpened(): if not paused: ret, frame = cap.read() if not ret: break cv2.imshow("Video", frame) key = cv2.waitKey(30) & 0xFF if key == ord('q'): break elif key == ord('p'): paused = not paused # Toggle pause cap.release() cv2.destroyAllWindows()
import cv2 import numpy as np cap = cv2.VideoCapture('video.mp4') ret, frame1 = cap.read() prev_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) # Parameters for ShiTomasi corner detection (for sparse flow) feature_params = dict(maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7) # Parameters for Lucas-Kanade optical flow lk_params = dict(winSize=(15,15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) p0 = cv2.goodFeaturesToTrack(prev_gray, mask=None, **feature_params) while True: ret, frame2 = cap.read() if not ret: break frame_gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) # Calculate optical flow (Sparse) p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, frame_gray, p0, None, **lk_params) # Select good points good_new = p1[st==1] good_old = p0[st==1] # Visualization skipped for brevity prev_gray = frame_gray.copy() p0 = good_new.reshape(-1,1,2) cap.release() cv2.destroyAllWindows()
cap = cv2.VideoCapture('video.mp4') ret, frame1 = cap.read() prev_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) while True: ret, frame2 = cap.read() if not ret: break gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0) # Visualization skipped for brevity prev_gray = gray cap.release() cv2.destroyAllWindows()
# Example: Draw lines for sparse flow points for i,(new,old) in enumerate(zip(good_new, good_old)): a,b = new.ravel() c,d = old.ravel() cv2.line(frame2, (a,b), (c,d), (0,255,0), 2) cv2.circle(frame2, (a,b), 5, (0,0,255), -1)
import cv2 cap = cv2.VideoCapture(0) # Open camera # Create tracker object (example: KCF) tracker = cv2.TrackerKCF_create() ret, frame = cap.read() bbox = cv2.selectROI("Frame", frame, False) # Select object to track tracker.init(frame, bbox) while True: ret, frame = cap.read() if not ret: break success, bbox = tracker.update(frame) if success: # Draw bounding box x, y, w, h = map(int, bbox) cv2.rectangle(frame, (x,y), (x+w, y+h), (0,255,0), 2) else: cv2.putText(frame, "Tracking failure", (50,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,255), 2) cv2.imshow("Tracking", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
trackers = cv2.MultiTracker_create() # Add multiple objects bbox1 = cv2.selectROI("Frame", frame, False) tracker1 = cv2.TrackerKCF_create() trackers.add(tracker1, frame, bbox1) # Add more trackers as needed... while True: ret, frame = cap.read() if not ret: break success, boxes = trackers.update(frame) for box in boxes: x, y, w, h = map(int, box) cv2.rectangle(frame, (x,y), (x+w, y+h), (255,0,0), 2) cv2.imshow("Multi Tracking", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break
import cv2 face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
img = cv2.imread("group.jpg") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5) for (x, y, w, h) in faces: cv2.rectangle(img, (x,y), (x+w, y+h), (255,0,0), 2) cv2.imshow("Faces", img) cv2.waitKey(0) cv2.destroyAllWindows()
cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.1, 5) for (x,y,w,h) in faces: cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2) cv2.imshow("Face Detection", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
import cv2 net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "model.caffemodel")
image = cv2.imread("image.jpg") (h, w) = image.shape[:2] blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, (300,300), 127.5) net.setInput(blob) detections = net.forward() for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > 0.5: box = detections[0, 0, i, 3:7] * [w, h, w, h] (startX, startY, endX, endY) = box.astype("int") cv2.rectangle(image, (startX, startY), (endX, endY), (0,255,0), 2) cv2.imshow("Detections", image) cv2.waitKey(0) cv2.destroyAllWindows()
net = cv2.dnn.readNetFromDarknet("yolov3.cfg", "yolov3.weights") layer_names = net.getLayerNames() output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()] blob = cv2.dnn.blobFromImage(image, 1/255.0, (416,416), swapRB=True, crop=False) net.setInput(blob) outputs = net.forward(output_layers) # Process outputs to extract bounding boxes (code omitted for brevity)
# Load pose estimation model and forward pass similar to detection # Extract and visualize keypoints on the image
cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300,300)), 0.007843, (300,300), 127.5) net.setInput(blob) detections = net.forward() for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > 0.5: box = detections[0, 0, i, 3:7] * [frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0]] (startX, startY, endX, endY) = box.astype("int") cv2.rectangle(frame, (startX, startY), (endX, endY), (0,255,0), 2) cv2.imshow("Real-time DNN", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
import cv2 img = cv2.imread('coins.jpg', 0) # Read image in grayscale # Simple binary threshold _, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) cv2.imshow('Threshold', thresh) cv2.waitKey(0) cv2.destroyAllWindows()
thresh_adapt = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) cv2.imshow('Adaptive Threshold', thresh_adapt) cv2.waitKey(0) cv2.destroyAllWindows()
import numpy as np # Read image and convert to grayscale img = cv2.imread('coins.jpg') gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Threshold and noise removal _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) kernel = np.ones((3,3), np.uint8) opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2) # Sure background area sure_bg = cv2.dilate(opening, kernel, iterations=3) # Finding sure foreground area dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5) _, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0) # Finding unknown region sure_fg = np.uint8(sure_fg) unknown = cv2.subtract(sure_bg, sure_fg) # Marker labelling _, markers = cv2.connectedComponents(sure_fg) markers = markers + 1 markers[unknown == 255] = 0 markers = cv2.watershed(img, markers) img[markers == -1] = [0, 0, 255] # Mark boundaries in red cv2.imshow('Watershed Segmentation', img) cv2.waitKey(0) cv2.destroyAllWindows()
img = cv2.imread('person.jpg') mask = np.zeros(img.shape[:2], np.uint8) bgdModel = np.zeros((1,65), np.float64) fgdModel = np.zeros((1,65), np.float64) rect = (50, 50, 450, 290) # ROI for foreground cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT) mask2 = np.where((mask==2)|(mask==0), 0, 1).astype('uint8') img_cut = img * mask2[:, :, np.newaxis] cv2.imshow('GrabCut Segmentation', img_cut) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = cv2.imread('chessboard.jpg') gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray = np.float32(gray) dst = cv2.cornerHarris(gray, 2, 3, 0.04) dst = cv2.dilate(dst, None) img[dst > 0.01 * dst.max()] = [0, 0, 255] cv2.imshow('Harris Corners', img) cv2.waitKey(0) cv2.destroyAllWindows()
sift = cv2.SIFT_create() keypoints, descriptors = sift.detectAndCompute(gray, None) img_sift = cv2.drawKeypoints(img, keypoints, None) cv2.imshow('SIFT Features', img_sift) cv2.waitKey(0) cv2.destroyAllWindows()
orb = cv2.ORB_create() keypoints, descriptors = orb.detectAndCompute(gray, None) img_orb = cv2.drawKeypoints(img, keypoints, None) cv2.imshow('ORB Features', img_orb) cv2.waitKey(0) cv2.destroyAllWindows()
img2 = cv2.imread('scene.jpg') gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) kp2, des2 = sift.detectAndCompute(gray2, None) bf = cv2.BFMatcher() matches = bf.knnMatch(descriptors, des2, k=2) # Apply ratio test good_matches = [] for m,n in matches: if m.distance < 0.75 * n.distance: good_matches.append(m) img_matches = cv2.drawMatches(img, keypoints, img2, kp2, good_matches, None, flags=2) cv2.imshow('Matches', img_matches) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np import glob # Prepare object points for a 9x6 chessboard pattern objp = np.zeros((6*9,3), np.float32) objp[:,:2] = np.mgrid[0:9,0:6].T.reshape(-1,2) objpoints = [] # 3D points in real world space imgpoints = [] # 2D points in image plane images = glob.glob('calib_images/*.jpg') for fname in images: img = cv2.imread(fname) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, corners = cv2.findChessboardCorners(gray, (9,6), None) if ret: objpoints.append(objp) imgpoints.append(corners) cv2.drawChessboardCorners(img, (9,6), corners, ret) cv2.imshow('Chessboard', img) cv2.waitKey(100) cv2.destroyAllWindows()
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None) print("Camera matrix:\n", mtx) print("Distortion coefficients:\n", dist)
img = cv2.imread('test.jpg') h, w = img.shape[:2] newcameramtx, roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h)) dst = cv2.undistort(img, mtx, dist, None, newcameramtx) x, y, w, h = roi dst = dst[y:y+h, x:x+w] cv2.imshow('Undistorted Image', dst) cv2.waitKey(0) cv2.destroyAllWindows()
# Example of stereo calibration and reconstruction is more complex and typically requires stereo image pairs. # This is a high-level overview; full implementation involves stereoRectify, compute disparity, and reproject to 3D.
import cv2 img1 = cv2.imread('left.jpg') img2 = cv2.imread('right.jpg') # Initialize ORB detector orb = cv2.ORB_create() # Find keypoints and descriptors kp1, des1 = orb.detectAndCompute(img1, None) kp2, des2 = orb.detectAndCompute(img2, None) # Match descriptors using BFMatcher bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) matches = bf.match(des1, des2) matches = sorted(matches, key=lambda x: x.distance) img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches[:20], None, flags=2) cv2.imshow('Matches', img_matches) cv2.waitKey(0) cv2.destroyAllWindows()
import numpy as np src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1,1,2) dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1,1,2) H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
height, width, _ = img2.shape result = cv2.warpPerspective(img1, H, (width * 2, height)) result[0:height, 0:width] = img2 cv2.imshow('Panorama', result) cv2.waitKey(0) cv2.destroyAllWindows()
stitcher = cv2.Stitcher_create() status, pano = stitcher.stitch([img1, img2]) if status == cv2.STITCHER_OK: cv2.imshow('Panorama', pano) cv2.waitKey(0) else: print('Error during stitching') cv2.destroyAllWindows()
# Formula: s * [u v 1]^T = K * [R | t] * [X Y Z 1]^T # where: # s = scale factor # [u v 1] = pixel coordinates (homogeneous) # K = intrinsic matrix # R, t = rotation and translation (extrinsic) # [X Y Z 1] = 3D world coordinates (homogeneous)
import numpy as np K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
R = np.eye(3) # Example rotation matrix t = np.array([[tx], [ty], [tz]]) # Translation vector
# Example function to project 3D point def project_point(X, K, R, t): X_homog = np.append(X, 1) RT = np.hstack((R, t)) x_cam = RT @ X_homog x_img = K @ x_cam x_img /= x_img[2] return x_img[:2]
import cv2 import numpy as np import glob # Prepare object points (0,0,0), (1,0,0), ..., (8,5,0) objp = np.zeros((6*9,3), np.float32) objp[:,:2] = np.mgrid[0:9,0:6].T.reshape(-1,2) objpoints = [] imgpoints = [] images = glob.glob('calib_images/*.jpg') for fname in images: img = cv2.imread(fname) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, corners = cv2.findChessboardCorners(gray, (9,6), None) if ret: objpoints.append(objp) imgpoints.append(corners) cv2.drawChessboardCorners(img, (9,6), corners, ret) cv2.imshow('Corners', img) cv2.waitKey(100) cv2.destroyAllWindows()
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None) print("Camera Matrix:\n", mtx) print("Distortion Coefficients:\n", dist)
img = cv2.imread('test.jpg') h, w = img.shape[:2] newcameramtx, roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h)) dst = cv2.undistort(img, mtx, dist, None, newcameramtx) x, y, w, h = roi dst = dst[y:y+h, x:x+w] cv2.imshow('Undistorted', dst) cv2.waitKey(0) cv2.destroyAllWindows()
# Assuming calibration done and matrices obtained # Use cv2.stereoRectify(), cv2.initUndistortRectifyMap() to rectify images
import cv2 import numpy as np left_img = cv2.imread('left.jpg', 0) right_img = cv2.imread('right.jpg', 0) stereo = cv2.StereoBM_create(numDisparities=16*5, blockSize=15) disparity = stereo.compute(left_img, right_img) cv2.imshow('Disparity Map', disparity) cv2.waitKey(0) cv2.destroyAllWindows()
# depth = (focal_length * baseline) / disparity
import cv2 img = cv2.imread('text_image.jpg') gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV) cv2.imshow('Preprocessed Image', thresh) cv2.waitKey(0) cv2.destroyAllWindows()
import pytesseract # Ensure pytesseract is installed and Tesseract OCR engine is set up text = pytesseract.image_to_string(thresh) print("Recognized Text:") print(text)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3)) dilated = cv2.dilate(thresh, kernel, iterations=1) cv2.imshow('Dilated Image', dilated) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 # Load YOLO model net = cv2.dnn.readNetFromDarknet("yolov3.cfg", "yolov3.weights") layer_names = net.getLayerNames() output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416,416), swapRB=True, crop=False) net.setInput(blob) outputs = net.forward(output_layers) # Process outputs to extract boxes, confidences, and class IDs # Draw bounding boxes on frame (code omitted for brevity) cv2.imshow("Real-time Object Detection", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
# Use cv2.dnn.NMSBoxes to perform Non-Maximum Suppression
import cv2 # Check if CUDA is available print(cv2.cuda.getCudaEnabledDeviceCount()) # Upload image to GPU memory img = cv2.imread('image.jpg') gpu_img = cv2.cuda_GpuMat() gpu_img.upload(img) # Perform Gaussian blur on GPU gpu_blur = cv2.cuda.createGaussianFilter(gpu_img.type(), -1, (15, 15), 0) blurred = gpu_blur.apply(gpu_img) # Download result back to CPU memory result = blurred.download() cv2.imshow('GPU Blur', result) cv2.waitKey(0) cv2.destroyAllWindows()
# Example: Background subtraction with MOG2 cap = cv2.VideoCapture('video.mp4') fgbg = cv2.createBackgroundSubtractorMOG2() while True: ret, frame = cap.read() if not ret: break fgmask = fgbg.apply(frame) cv2.imshow('Foreground Mask', fgmask) if cv2.waitKey(30) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
# Load TensorFlow model net = cv2.dnn.readNetFromTensorflow('frozen_inference_graph.pb', 'graph.pbtxt') # Use net as usual for forward pass
import cv2 import numpy as np # Prepare training data: features and labels trainData = np.random.randint(0, 100, (25, 2)).astype(np.float32) responses = np.random.randint(0, 2, (25, 1)).astype(np.float32) # Create and train kNN knn = cv2.ml.KNearest_create() knn.train(trainData, cv2.ml.ROW_SAMPLE, responses) # Predict for new sample newcomer = np.array([[50, 50]], dtype=np.float32) ret, results, neighbours, dist = knn.findNearest(newcomer, k=3) print("Predicted class:", results[0][0])
svm = cv2.ml.SVM_create() svm.setType(cv2.ml.SVM_C_SVC) svm.setKernel(cv2.ml.SVM_LINEAR) svm.train(trainData, cv2.ml.ROW_SAMPLE, responses) pred = svm.predict(newcomer) print("SVM Prediction:", pred[1][0][0])
dtree = cv2.ml.DTrees_create() dtree.train(trainData, cv2.ml.ROW_SAMPLE, responses) pred_dt = dtree.predict(newcomer) print("Decision Tree Prediction:", pred_dt[1][0][0])
import cv2 import numpy as np # Load pretrained MobileNet model files net = cv2.dnn.readNetFromCaffe('mobilenet_deploy.prototxt', 'mobilenet.caffemodel') img = cv2.imread('dog.jpg') blob = cv2.dnn.blobFromImage(img, 1/127.5, (224, 224), (127.5, 127.5, 127.5), swapRB=True) net.setInput(blob) preds = net.forward() class_id = np.argmax(preds[0]) confidence = preds[0][class_id] print(f"Class ID: {class_id}, Confidence: {confidence}")
import cv2 import numpy as np img = cv2.imread('scene.jpg') template = cv2.imread('template.jpg', 0) img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) top_left = max_loc h, w = template.shape bottom_right = (top_left[0] + w, top_left[1] + h) cv2.rectangle(img, top_left, bottom_right, (0,255,0), 2) cv2.imshow('Detected', img) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) lower_skin = (0, 20, 70) upper_skin = (20, 255, 255) mask = cv2.inRange(hsv, lower_skin, upper_skin) contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if contours: cnt = max(contours, key=cv2.contourArea) cv2.drawContours(frame, [cnt], -1, (0,255,0), 3) cv2.imshow('Hand Detection', frame) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
import cv2 recognizer = cv2.face.LBPHFaceRecognizer_create() recognizer.read('trainer.yml') face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') img = cv2.imread('test.jpg') gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray) for (x,y,w,h) in faces: roi_gray = gray[y:y+h, x:x+w] id_, conf = recognizer.predict(roi_gray) cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2) cv2.putText(img, str(id_), (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2) cv2.imshow('Face Recognition', img) cv2.waitKey(0) cv2.destroyAllWindows()
# Using OpenCV ArUco markers import cv2 import cv2.aruco as aruco cap = cv2.VideoCapture(0) dictionary = aruco.Dictionary_get(aruco.DICT_6X6_250) parameters = aruco.DetectorParameters_create() while True: ret, frame = cap.read() if not ret: break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) corners, ids, _ = aruco.detectMarkers(gray, dictionary, parameters=parameters) if ids is not None: aruco.drawDetectedMarkers(frame, corners, ids) cv2.imshow('AR Marker Detection', frame) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
import cv2 img = cv2.imread('coins.jpg', 0) _, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) cv2.imshow('Thresholded', thresh) cv2.waitKey(0) cv2.destroyAllWindows()
import numpy as np # Assume binary image thresh is obtained dist_transform = cv2.distanceTransform(thresh, cv2.DIST_L2, 5) _, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0) sure_fg = np.uint8(sure_fg) unknown = cv2.subtract(thresh, sure_fg) # Marker labeling _, markers = cv2.connectedComponents(sure_fg) markers = markers + 1 markers[unknown==255] = 0 markers = cv2.watershed(cv2.cvtColor(img, cv2.COLOR_GRAY2BGR), markers) img[markers == -1] = [0,0,255] cv2.imshow('Watershed', img) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread('low_contrast.jpg', 0) equ = cv2.equalizeHist(img) cv2.imshow('Original', img) cv2.imshow('Equalized', equ) cv2.waitKey(0) cv2.destroyAllWindows()
denoised = cv2.medianBlur(img, 5) cv2.imshow('Denoised', denoised) cv2.waitKey(0) cv2.destroyAllWindows()
from PIL import Image from PIL.ExifTags import TAGS img = Image.open('image.jpg') exif_data = img._getexif() for tag_id, value in exif_data.items(): tag = TAGS.get(tag_id, tag_id) print(f"{tag}: {value}")
import cv2 cap = cv2.VideoCapture('video.mp4') fgbg = cv2.createBackgroundSubtractorMOG2() while True: ret, frame = cap.read() if not ret: break fgmask = fgbg.apply(frame) cv2.imshow('Foreground Mask', fgmask) if cv2.waitKey(30) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
import cv2 import numpy as np # Load pre-trained SVM model from file svm = cv2.ml.SVM_load('svm_model.xml') # Prepare sample input data (2D feature vector) sample = np.array([[12.5, 3.7]], dtype=np.float32) # Predict class label _, result = svm.predict(sample) print("Predicted class:", result[0][0])
import cv2 images = [cv2.imread('img1.jpg'), cv2.imread('img2.jpg')] stitcher = cv2.Stitcher_create() status, pano = stitcher.stitch(images) if status == cv2.STITCHER_OK: cv2.imshow('Panorama', pano) cv2.waitKey(0) cv2.destroyAllWindows() else: print("Stitching failed:", status)
import cv2 import numpy as np # Load stereo images imgL = cv2.imread('left.jpg', 0) imgR = cv2.imread('right.jpg', 0) stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15) disparity = stereo.compute(imgL, imgR) cv2.imshow('Disparity', disparity) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread('input.png') # Save as JPEG with quality = 90 (out of 100) cv2.imwrite('output.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
def reprojection_error(objpoints, imgpoints, rvecs, tvecs, mtx, dist): total_error = 0 for i in range(len(objpoints)): imgpoints2, _ = cv2.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist) error = cv2.norm(imgpoints[i], imgpoints2, cv2.NORM_L2) / len(imgpoints2) total_error += error return total_error / len(objpoints) # After calibration: error = reprojection_error(objpoints, imgpoints, rvecs, tvecs, mtx, dist) print("Mean Reprojection Error:", error)
import cv2 img1 = cv2.imread('img1.jpg', 0) img2 = cv2.imread('img2.jpg', 0) orb = cv2.ORB_create() kp1, des1 = orb.detectAndCompute(img1, None) kp2, des2 = orb.detectAndCompute(img2, None) bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) matches = bf.match(des1, des2) matches = sorted(matches, key=lambda x: x.distance) img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches[:10], None, flags=2) cv2.imshow('Matches', img_matches) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 cap = cv2.VideoCapture(0) ret, frame = cap.read() bbox = cv2.selectROI("Frame", frame, False) tracker = cv2.TrackerCSRT_create() tracker.init(frame, bbox) while True: ret, frame = cap.read() if not ret: break success, bbox = tracker.update(frame) if success: x,y,w,h = [int(v) for v in bbox] cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2) else: cv2.putText(frame, "Tracking failure", (50,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,255),2) cv2.imshow('Tracking', frame) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
import cv2 img = cv2.imread('image.jpg') hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) cv2.imshow('HSV Image', hsv) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np cap = cv2.VideoCapture('input_video.mp4') _, prev = cap.read() prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY) transforms = [] while True: ret, curr = cap.read() if not ret: break curr_gray = cv2.cvtColor(curr, cv2.COLOR_BGR2GRAY) flow = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, None, None)[0] # ... further processing for stabilization ... prev_gray = curr_gray cap.release()
import cv2 import numpy as np img = cv2.imread('image.jpg') # Invert colors using NumPy inverted = 255 - img cv2.imshow('Inverted Image', inverted) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg') cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) layer_outputs = net.forward(net.getUnconnectedOutLayersNames()) # Post-processing omitted for brevity... cv2.imshow('Real-Time Detection', frame) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
import cv2 net = cv2.dnn.readNetFromTensorflow('frozen_inference_graph.pb', 'graph.pbtxt') image = cv2.imread('image.jpg') blob = cv2.dnn.blobFromImage(image, size=(300, 300), swapRB=True) net.setInput(blob) output = net.forward() # Process detections... cv2.imshow('Output', image) cv2.waitKey(0) cv2.destroyAllWindows()
# Sample command to convert and optimize a model with OpenVINO CLI (outside Python) # mo.py --input_model model.pb --output_dir optimized_model # In Python, load optimized model as usual with cv2.dnn.readNet()
# Install LabelImg via pip: # pip install labelImg # Run labelImg to annotate images and save XML files in Pascal VOC format
# Transfer learning often done in frameworks like TensorFlow or PyTorch, # but OpenCV can load the resulting models for inference. # Load fine-tuned model in OpenCV for inference: net = cv2.dnn.readNet('fine_tuned_model.pb')
import cv2 # Check if CUDA is available print(cv2.cuda.getCudaEnabledDeviceCount()) # Upload image to GPU memory img = cv2.imread('image.jpg') gpu_img = cv2.cuda_GpuMat() gpu_img.upload(img) # Perform Gaussian blur on GPU gpu_blurred = cv2.cuda.createGaussianFilter(gpu_img.type(), -1, (15,15), 0).apply(gpu_img) blurred = gpu_blurred.download() cv2.imshow('Blurred with CUDA', blurred) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 cap = cv2.VideoCapture(0) # Pi camera module ret, frame = cap.read() if ret: cv2.imwrite('capture.jpg', frame) cap.release()
import cv2 face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') img = cv2.imread('group_photo.jpg') gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.3, 5) for (x,y,w,h) in faces: roi = img[y:y+h, x:x+w] roi = cv2.GaussianBlur(roi, (99, 99), 30) img[y:y+h, x:x+w] = roi cv2.imshow('Blurred Faces', img) cv2.waitKey(0) cv2.destroyAllWindows()
import boto3 s3 = boto3.client('s3') filename = 'image.jpg' bucket_name = 'mybucket' s3.upload_file(filename, bucket_name, filename)
# Vision Transformers (ViT) implementation usually in PyTorch or TensorFlow # Example only conceptual: # model = ViT(...) # output = model(input_image_tensor)
import cv2 import numpy as np cap = cv2.VideoCapture(0) aruco_dict = cv2.aruco.Dictionary_get(cv2.aruco.DICT_6X6_250) parameters = cv2.aruco.DetectorParameters_create() while True: ret, frame = cap.read() if not ret: break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) corners, ids, rejected = cv2.aruco.detectMarkers(gray, aruco_dict, parameters=parameters) if ids is not None: cv2.aruco.drawDetectedMarkers(frame, corners, ids) cv2.imshow('AR Markers', frame) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
import cv2 img = cv2.imread('coins.jpg', 0) ret, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) cv2.imshow('Thresholded Image', thresh) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if contours: cv2.drawContours(frame, [max(contours, key=cv2.contourArea)], -1, (0,255,0), 3) cv2.imshow('Gesture Recognition', frame) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
import cv2 import pytesseract img = cv2.imread('text_image.png') gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)[1] text = pytesseract.image_to_string(thresh) print("Extracted Text:", text)
import cv2 import numpy as np img = cv2.imread('fisheye.jpg') DIM = img.shape[:2][::-1] K = np.array([[300.0, 0.0, DIM[0]/2], [0.0, 300.0, DIM[1]/2], [0.0, 0.0, 1.0]]) D = np.array([-0.1, 0.01, 0.0, 0.0]) map1, map2 = cv2.fisheye.initUndistortRectifyMap(K, D, np.eye(3), K, DIM, cv2.CV_16SC2) undistorted = cv2.remap(img, map1, map2, interpolation=cv2.INTER_LINEAR) cv2.imshow('Undistorted Image', undistorted) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 cap1 = cv2.VideoCapture(0) # First camera cap2 = cv2.VideoCapture(1) # Second camera while True: ret1, frame1 = cap1.read() ret2, frame2 = cap2.read() if not ret1 or not ret2: break cv2.imshow('Camera 1', frame1) cv2.imshow('Camera 2', frame2) if cv2.waitKey(1) & 0xFF == 27: # ESC to quit break cap1.release() cap2.release() cv2.destroyAllWindows()
import cv2 import numpy as np cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(gray, 60, 255, cv2.THRESH_BINARY_INV) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if contours: c = max(contours, key=cv2.contourArea) cv2.drawContours(frame, [c], -1, (0,255,0), 3) cv2.imshow('Line Following', frame) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
import cv2 import numpy as np img = cv2.imread('image.jpg', 0) mean, stddev = cv2.meanStdDev(img) # Threshold anomalies above mean + 2*stddev _, anomaly = cv2.threshold(img, mean + 2*stddev, 255, cv2.THRESH_BINARY) cv2.imshow('Anomalies', anomaly) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 # Connect to drone camera stream (example URL) cap = cv2.VideoCapture('http://192.168.1.1:8080/video') while True: ret, frame = cap.read() if not ret: break cv2.imshow('Drone Camera', frame) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
# Grad-CAM typically implemented with deep learning frameworks (e.g., PyTorch) # Basic conceptual steps: # 1. Forward pass input through CNN # 2. Get gradients of target class output w.r.t. convolutional feature maps # 3. Compute weighted sum of feature maps # 4. Overlay heatmap on input image
import cv2 import numpy as np imgL = cv2.imread('left.jpg', 0) imgR = cv2.imread('right.jpg', 0) stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15) disparity = stereo.compute(imgL, imgR) cv2.imshow('Disparity', disparity) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 cap = cv2.VideoCapture(0) orb = cv2.ORB_create() while True: ret, frame = cap.read() if not ret: break keypoints = orb.detect(frame, None) frame = cv2.drawKeypoints(frame, keypoints, None, color=(0,255,0)) cv2.imshow('ORB Keypoints', frame) if cv2.waitKey(1) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
# Typically involves a pre-trained CNN for image features # and an LSTM or Transformer for question encoding, # combined to generate answers. # Pseudocode: # img_features = CNN(image) # question_embedding = LSTM(question) # answer = decoder(img_features, question_embedding)
# Extract features with CNN (e.g., ResNet) # Feed features into RNN or Transformer for sequence generation # Output caption describes image content # This is usually implemented in deep learning frameworks.
# Two identical CNNs process image pairs # Output measures similarity # Used for one-shot/few-shot classification # Typically implemented with PyTorch or TensorFlow.
# Generator creates fake images # Discriminator classifies real vs fake # Both train in competition to improve quality # Implementation typically in deep learning frameworks.
import cv2 cap = cv2.VideoCapture('video.mp4') ret, prev_frame = cap.read() prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY) while True: ret, frame = cap.read() if not ret: break gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) diff = cv2.absdiff(prev_gray, gray) _, thresh = cv2.threshold(diff, 30, 255, cv2.THRESH_BINARY) cv2.imshow('Frame Difference', thresh) prev_gray = gray if cv2.waitKey(30) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
# Acquire image # Detect target features # Calculate error between current and desired feature position # Command robot actuators to minimize error # Implementation varies widely by robot and system.
import cv2 import numpy as np criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) objp = np.zeros((6*9,3), np.float32) objp[:,:2] = np.mgrid[0:9,0:6].T.reshape(-1,2) objpoints = [] imgpoints = [] images = [...] # List of calibration images for fname in images: img = cv2.imread(fname) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, corners = cv2.findChessboardCorners(gray, (9,6), None) if ret: objpoints.append(objp) corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria) imgpoints.append(corners2) ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
import cv2 import numpy as np img = cv2.imread('image.jpg', 0) laplacian_var = cv2.Laplacian(img, cv2.CV_64F).var() print("Sharpness (variance of Laplacian):", laplacian_var)
import cv2 img1 = cv2.imread('left.jpg') img2 = cv2.imread('right.jpg') stitcher = cv2.Stitcher_create() status, pano = stitcher.stitch([img1, img2]) if status == cv2.STITCHER_OK: cv2.imshow('Panorama', pano) cv2.waitKey(0) cv2.destroyAllWindows() else: print('Error during stitching')
import cv2 import numpy as np # 3D points in world coordinates obj_points = np.array([[0,0,0],[1,0,0],[1,1,0],[0,1,0]], dtype=np.float32) # Corresponding 2D points in image img_points = np.array([[320,240],[400,240],[400,320],[320,320]], dtype=np.float32) camera_matrix = np.array([[800,0,320],[0,800,240],[0,0,1]], dtype=np.float32) dist_coeffs = np.zeros(5) ret, rvec, tvec = cv2.solvePnP(obj_points, img_points, camera_matrix, dist_coeffs) print("Rotation Vector:\n", rvec) print("Translation Vector:\n", tvec)
import cv2 img = cv2.imread('input.png') cv2.imwrite('output.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
import cv2 img = cv2.imread('image.jpg') blurred = cv2.GaussianBlur(img, (5,5), 0) cv2.imshow('Blurred Image', blurred) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 import numpy as np img = cv2.imread('damaged.jpg') mask = cv2.imread('mask.png', 0) restored = cv2.inpaint(img, mask, 3, cv2.INPAINT_TELEA) cv2.imshow('Restored Image', restored) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img = cv2.imread('low_res.jpg') upscaled = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC) cv2.imshow('Upscaled Image', upscaled) cv2.waitKey(0) cv2.destroyAllWindows()
import cv2 img1 = cv2.imread('img1.jpg', 0) img2 = cv2.imread('img2.jpg', 0) orb = cv2.ORB_create() kp1, des1 = orb.detectAndCompute(img1, None) kp2, des2 = orb.detectAndCompute(img2, None) bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) matches = bf.match(des1, des2) matches = sorted(matches, key=lambda x:x.distance) img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches[:10], None, flags=2) cv2.imshow('Matches', img_matches) cv2.waitKey(0) cv2.destroyAllWindows()
# Requires OpenCV DNN module and EAST model files import cv2 import numpy as np net = cv2.dnn.readNet('frozen_east_text_detection.pb') img = cv2.imread('scene.jpg') (h, w) = img.shape[:2] blob = cv2.dnn.blobFromImage(img, 1.0, (320,320), (123.68,116.78,103.94), True, False) net.setInput(blob) scores, geometry = net.forward(['feature_fusion/Conv_7/Sigmoid','feature_fusion/concat_3']) # Postprocessing needed to extract boxes (complex, not shown here)
# Typically uses pretrained CNNs (e.g., VGG) # Optimize output image to minimize content & style loss # Requires deep learning frameworks # Conceptual code: # content_features = CNN(content_image) # style_features = CNN(style_image) # output = optimize(content_features, style_features)
import cv2 img = cv2.imread('image.jpg') flip = cv2.flip(img, 1) # Horizontal flip rotated = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) cv2.imshow('Original', img) cv2.imshow('Flipped', flip) cv2.imshow('Rotated', rotated) cv2.waitKey(0) cv2.destroyAllWindows()
# U-Net uses encoder-decoder CNN architecture # Input image → downsampling → upsampling → pixel classification # Requires deep learning frameworks for implementation
import cv2 cap = cv2.VideoCapture('video.mp4') fgbg = cv2.createBackgroundSubtractorMOG2() while True: ret, frame = cap.read() if not ret: break fgmask = fgbg.apply(frame) cv2.imshow('FG Mask', fgmask) if cv2.waitKey(30) & 0xFF == 27: break cap.release() cv2.destroyAllWindows()
# Extract image features with CNN # Use language models to generate questions # Combine multimodal features for coherent question generation
# Calculate attention scores between image features # Weight features based on relevance # Integrate weighted features into prediction pipeline
# Compute gradient of output w.r.t input image pixels # Visualize gradients to show influential regions
# Align feature distributions of source and target domains # Use adversarial training or discrepancy minimization
# Generate augmented image pairs # Train model to maximize agreement between pairs # Useful for feature extraction
# Define search space of possible architectures # Use reinforcement learning or evolutionary methods # Evaluate performance on validation set
# Split image into fixed-size patches # Flatten and embed patches as tokens # Use transformer layers for classification
# Autonomous vehicles, AR/VR, medical imaging, robotics, surveillance # Advances in hardware and algorithms will enable new possibilities