# Import the OpenCV library
import cv2
# Print the OpenCV version to confirm installation
print(cv2.__version__) # Example output: 4.8.0
# Print history information
print("OpenCV started in 1999 at Intel and became open-source in 2000.")
# Basic OpenCV installation
# pip install opencv-python
# For full functionality including contrib modules
# pip install opencv-contrib-python
# Optional: Create a virtual environment (Linux/macOS)
# python3 -m venv opencv_env
# source opencv_env/bin/activate
# Windows version
# python -m venv opencv_env
# .\opencv_env\Scripts\activate
# Then install OpenCV
# pip install opencv-python
# Compare OpenCV and PIL image loading
import cv2
from PIL import Image
# Using OpenCV to read image
img_cv2 = cv2.imread("image.jpg")
# Using PIL to open image
img_pil = Image.open("image.jpg")
# Import OpenCV
import cv2
# Load an image (make sure sample.jpg exists)
img = cv2.imread("sample.jpg")
# Show image in a window
cv2.imshow("My First OpenCV Window", img)
# Wait for any key press
cv2.waitKey(0)
# Close all OpenCV windows
cv2.destroyAllWindows()
import cv2
# Read color image
img = cv2.imread("photo.jpg", 1) # 1 = color
# Display the image
cv2.imshow("Color Image", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
# Read the image
img = cv2.imread("photo.jpg")
# Save it as a new file
cv2.imwrite("copy_photo.png", img) # Saves as PNG
import cv2
img = cv2.imread("photo.jpg")
print("Shape:", img.shape) # (height, width, channels)
print("Size:", img.size) # Total number of pixels
print("Data type:", img.dtype) # Type of each pixel
import cv2
# Convert color image to grayscale
img = cv2.imread("photo.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow("Gray Image", gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
# Define ROI coordinates: y1:y2, x1:x2
roi = img[100:200, 150:250] # Crop region
# Show the cropped part
cv2.imshow("ROI", roi)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
# Resize to 300x300
resized = cv2.resize(img, (300, 300))
cv2.imshow("Resized", resized)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = np.zeros((400, 400, 3), dtype=np.uint8)
# Draw a blue line
cv2.line(img, (50, 50), (350, 50), (255, 0, 0), 3)
# Draw a green circle
cv2.circle(img, (200, 200), 50, (0, 255, 0), 2)
cv2.imshow("Shapes", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = np.ones((400, 400, 3), dtype=np.uint8) * 255
# Draw rectangle
cv2.rectangle(img, (100, 100), (300, 200), (0, 0, 255), 2)
# Draw ellipse
cv2.ellipse(img, (200, 300), (100, 50), 0, 0, 360, (0, 100, 200), 2)
cv2.imshow("Shapes", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = np.ones((400, 400, 3), dtype=np.uint8) * 255
# Define polygon points
pts = np.array([[50, 300], [100, 200], [200, 250], [300, 300]], np.int32)
pts = pts.reshape((-1, 1, 2))
# Draw closed polygon
cv2.polylines(img, [pts], True, (255, 0, 255), 2)
cv2.imshow("Polygon", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = np.zeros((300, 600, 3), dtype=np.uint8)
# Put text
cv2.putText(img, "OpenCV Rocks!", (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 0), 2)
cv2.imshow("Text", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = np.ones((300, 300, 3), dtype=np.uint8) * 255
# Filled red rectangle
cv2.rectangle(img, (50, 50), (250, 150), (0, 0, 255), -1)
cv2.imshow("Filled", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
drawing = False
ix, iy = -1, -1
img = np.ones((400, 400, 3), dtype=np.uint8) * 255
def draw_circle(event, x, y, flags, param):
global ix, iy, drawing
if event == cv2.EVENT_LBUTTONDOWN:
drawing = True
ix, iy = x, y
elif event == cv2.EVENT_MOUSEMOVE and drawing:
cv2.circle(img, (x, y), 5, (255, 0, 0), -1)
elif event == cv2.EVENT_LBUTTONUP:
drawing = False
cv2.namedWindow("Draw")
cv2.setMouseCallback("Draw", draw_circle)
while True:
cv2.imshow("Draw", img)
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.destroyAllWindows()
import cv2
import numpy as np
img = cv2.imread("photo.jpg")
# Define translation matrix: move 100 right, 50 down
M = np.float32([[1, 0, 100], [0, 1, 50]])
translated = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
cv2.imshow("Translated", translated)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
# Rotate by 45 degrees around center
M = cv2.getRotationMatrix2D(center, 45, 1.0)
rotated = cv2.warpAffine(img, M, (w, h))
cv2.imshow("Rotated", rotated)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
# Scale image by 50%
scaled = cv2.resize(img, None, fx=0.5, fy=0.5)
cv2.imshow("Scaled", scaled)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
# Flip horizontally (flipCode=1)
flipped = cv2.flip(img, 1)
cv2.imshow("Flipped", flipped)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
# Crop rectangle: y:100-300, x:150-350
cropped = img[100:300, 150:350]
cv2.imshow("Cropped", cropped)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = cv2.imread("photo.jpg")
# Affine transform points
pts1 = np.float32([[50,50], [200,50], [50,200]])
pts2 = np.float32([[10,100], [200,50], [100,250]])
M = cv2.getAffineTransform(pts1, pts2)
affine = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
cv2.imshow("Affine", affine)
cv2.waitKey(0)
cv2.destroyAllWindows()
# BGR is OpenCV default color space
# Different models suit different tasks like HSV for color filtering
import cv2
img = cv2.imread("photo.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow("Grayscale", gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
cv2.imshow("HSV", hsv)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
cv2.imshow("LAB", lab)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
b, g, r = cv2.split(img)
# Merge channels back
merged = cv2.merge([b, g, r])
import cv2
import numpy as np
img = cv2.imread("photo.jpg")
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# Define lower and upper bounds for blue color
lower_blue = np.array([100, 150, 0])
upper_blue = np.array([140, 255, 255])
mask = cv2.inRange(hsv, lower_blue, upper_blue)
result = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow("Filtered Blue", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg", 0) # Grayscale
_, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
cv2.imshow("Global Threshold", thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg", 0)
adaptive = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 11, 2)
cv2.imshow("Adaptive Threshold", adaptive)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg", 0)
_, otsu = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
cv2.imshow("Otsu Threshold", otsu)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg", 0)
_, inv_thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
cv2.imshow("Inverted Threshold", inv_thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg", 0)
_, trunc = cv2.threshold(img, 127, 255, cv2.THRESH_TRUNC)
_, tozero = cv2.threshold(img, 127, 255, cv2.THRESH_TOZERO)
cv2.imshow("Trunc Threshold", trunc)
cv2.imshow("ToZero Threshold", tozero)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Typically done by stacking thresholded images and showing them
# (Implementation is similar to above, combining outputs)
import cv2
img = cv2.imread("photo.jpg")
# Apply averaging filter with 5x5 kernel
blur = cv2.blur(img, (5, 5))
cv2.imshow("Averaging", blur)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
# Apply Gaussian blur with 5x5 kernel
gauss = cv2.GaussianBlur(img, (5, 5), 0)
cv2.imshow("Gaussian Blur", gauss)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
# Apply median blur with kernel size 5
median = cv2.medianBlur(img, 5)
cv2.imshow("Median Blur", median)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg")
# Apply bilateral filter
bilateral = cv2.bilateralFilter(img, 9, 75, 75)
cv2.imshow("Bilateral Filter", bilateral)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = cv2.imread("photo.jpg")
# Kernel for sharpening
kernel = np.array([[0, -1, 0],
[-1, 5,-1],
[0, -1, 0]])
sharpened = cv2.filter2D(img, -1, kernel)
cv2.imshow("Sharpened", sharpened)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Noise reduction often combines blurring and filtering
# Use above blurring functions depending on noise type
import cv2
import numpy as np
img = cv2.imread("binary_image.png", 0)
kernel = np.ones((5,5), np.uint8)
# Erode image
erosion = cv2.erode(img, kernel, iterations=1)
# Dilate image
dilation = cv2.dilate(img, kernel, iterations=1)
cv2.imshow("Erosion", erosion)
cv2.imshow("Dilation", dilation)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = cv2.imread("binary_image.png", 0)
kernel = np.ones((5,5), np.uint8)
# Opening removes noise
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
# Closing fills holes
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
cv2.imshow("Opening", opening)
cv2.imshow("Closing", closing)
cv2.waitKey(0)
cv2.destroyAllWindows()
gradient = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)
cv2.imshow("Morphological Gradient", gradient)
cv2.waitKey(0)
cv2.destroyAllWindows()
tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)
blackhat = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel)
cv2.imshow("Top Hat", tophat)
cv2.imshow("Black Hat", blackhat)
cv2.waitKey(0)
cv2.destroyAllWindows()
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
ellipse_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
cross_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (5,5))
# Example: Clean noisy text regions before OCR
cleaned = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
import cv2
img = cv2.imread("photo.jpg", 0) # Load grayscale
# Sobel edge detection in X and Y directions
sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3)
cv2.imshow("Sobel X", sobelx)
cv2.imshow("Sobel Y", sobely)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg", 0)
scharrx = cv2.Scharr(img, cv2.CV_64F, 1, 0)
scharry = cv2.Scharr(img, cv2.CV_64F, 0, 1)
cv2.imshow("Scharr X", scharrx)
cv2.imshow("Scharr Y", scharry)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg", 0)
laplacian = cv2.Laplacian(img, cv2.CV_64F)
cv2.imshow("Laplacian", laplacian)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg", 0)
edges = cv2.Canny(img, 100, 200)
cv2.imshow("Canny Edges", edges)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Compare Sobel, Laplacian, and Canny by displaying results side-by-side
# (Combine above examples visually in an app)
import cv2
img = cv2.imread("shapes.png", 0)
ret, thresh = cv2.threshold(img, 127, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
img_color = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
for i, contour in enumerate(contours):
cv2.drawContours(img_color, [contour], -1, (0,255,0), 2)
# Label contour
x, y = contour[0][0]
cv2.putText(img_color, f'#{i+1}', (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)
cv2.imshow("Contours", img_color)
cv2.waitKey(0)
cv2.destroyAllWindows()
for contour in contours:
area = cv2.contourArea(contour)
perimeter = cv2.arcLength(contour, True)
print(f"Area: {area}, Perimeter: {perimeter}")
for contour in contours:
epsilon = 0.02 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
print(f"Approximated points: {len(approx)}")
for contour in contours:
hull = cv2.convexHull(contour)
defects = cv2.convexityDefects(contour, cv2.convexHull(contour, returnPoints=False))
print(f"Convex Hull points: {len(hull)}")
if defects is not None:
print(f"Defects count: {defects.shape[0]}")
import cv2
from matplotlib import pyplot as plt
img = cv2.imread("photo.jpg", 0) # Grayscale
hist = cv2.calcHist([img], [0], None, [256], [0,256])
plt.plot(hist)
plt.title("Grayscale Histogram")
plt.xlabel("Pixel Intensity")
plt.ylabel("Frequency")
plt.show()
img = cv2.imread("photo.jpg", 0)
equalized = cv2.equalizeHist(img)
cv2.imshow("Equalized", equalized)
cv2.waitKey(0)
cv2.destroyAllWindows()
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
clahe_img = clahe.apply(img)
cv2.imshow("CLAHE", clahe_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
img = cv2.imread("photo.jpg")
hist = cv2.calcHist([img], [0, 1], None, [32, 32], [0,256, 0,256])
print(hist.shape) # 2D histogram shape
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
roi_hist = cv2.calcHist([hsv], [0], None, [180], [0,180])
cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)
back_proj = cv2.calcBackProject([hsv], [0], roi_hist, [0,180], 1)
cv2.imshow("Backprojection", back_proj)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread("photo.jpg", 0)
grad_x = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3)
cv2.imshow("Gradient X", grad_x)
cv2.imshow("Gradient Y", grad_y)
cv2.waitKey(0)
cv2.destroyAllWindows()
import numpy as np
magnitude = cv2.magnitude(grad_x, grad_y)
angle = cv2.phase(grad_x, grad_y, angleInDegrees=True)
cv2.imshow("Gradient Magnitude", magnitude)
cv2.waitKey(0)
cv2.destroyAllWindows()
laplacian = cv2.Laplacian(img, cv2.CV_64F)
cv2.imshow("Laplacian", laplacian)
cv2.waitKey(0)
cv2.destroyAllWindows()
grad_display = cv2.convertScaleAbs(magnitude)
cv2.imshow("Gradient Visualization", grad_display)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Example: Use gradient magnitude to create edge mask
_, edge_mask = cv2.threshold(grad_display, 50, 255, cv2.THRESH_BINARY)
cv2.imshow("Edge Mask", edge_mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = cv2.imread("photo.jpg")
pts1 = np.float32([[50,50],[200,50],[50,200]])
pts2 = np.float32([[10,100],[200,50],[100,250]])
M = cv2.getAffineTransform(pts1, pts2)
affine = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
cv2.imshow("Affine Transform", affine)
cv2.waitKey(0)
cv2.destroyAllWindows()
pts1 = np.float32([[56,65],[368,52],[28,387],[389,390]])
pts2 = np.float32([[0,0],[300,0],[0,300],[300,300]])
M = cv2.getPerspectiveTransform(pts1, pts2)
perspective = cv2.warpPerspective(img, M, (300,300))
cv2.imshow("Perspective Transform", perspective)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Image warping example uses warpAffine or warpPerspective functions
# (see above examples)
(h, w) = img.shape[:2]
center = (w//2, h//2)
M = cv2.getRotationMatrix2D(center, 45, 1.0)
rotated = cv2.warpAffine(img, M, (w, h))
cv2.imshow("Rotated Image", rotated)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Skew correction involves detecting edges and applying perspective transform
# Example code is similar to perspective transform example
# Real-world correction often combines edge detection and perspective warp
# See perspective transform example above
import cv2
img1 = cv2.imread("mask1.png", 0)
img2 = cv2.imread("mask2.png", 0)
bitwise_and = cv2.bitwise_and(img1, img2)
bitwise_or = cv2.bitwise_or(img1, img2)
bitwise_xor = cv2.bitwise_xor(img1, img2)
cv2.imshow("AND", bitwise_and)
cv2.imshow("OR", bitwise_or)
cv2.imshow("XOR", bitwise_xor)
cv2.waitKey(0)
cv2.destroyAllWindows()
img = cv2.imread("photo.jpg")
mask = cv2.imread("mask.png", 0)
masked = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow("Masked Image", masked)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Blend example:
# result = bitwise_and + bitwise_or or use addWeighted for smooth blend
# Overlaying uses masks with bitwise_and and bitwise_or
# Dynamic combination handled with bitwise and alpha blending
# Example: Extract segmented object using mask
segmented = cv2.bitwise_and(img, img, mask=mask)
import cv2
import numpy as np
img = cv2.imread("scene.jpg", 0)
template = cv2.imread("template.jpg", 0)
res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = max_loc
h, w = template.shape
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img, top_left, bottom_right, 255, 2)
cv2.imshow("Detected Template", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
threshold = 0.8
loc = np.where(res >= threshold)
for pt in zip(*loc[::-1]):
cv2.rectangle(img, pt, (pt[0]+w, pt[1]+h), (0,255,0), 2)
cv2.imshow("Multiple Matches", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Loop over several templates and apply matchTemplate each time
# Rotate template with cv2.getRotationMatrix2D and match again
# Capture video, apply matchTemplate on each frame, draw rectangle around match
import cv2
cap = cv2.VideoCapture("video.mp4") # Open video file
while cap.isOpened():
ret, frame = cap.read() # Read frame
if not ret:
break # End of video
cv2.imshow("Video Frame", frame)
if cv2.waitKey(25) & 0xFF == ord('q'): # Press 'q' to quit
break
cap.release()
cv2.destroyAllWindows()
cap = cv2.VideoCapture(0) # Open default camera
while True:
ret, frame = cap.read()
if not ret:
break
cv2.imshow("Webcam", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
cap = cv2.VideoCapture(0)
# Define codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640,480))
while True:
ret, frame = cap.read()
if not ret:
break
out.write(frame) # Write frame to file
cv2.imshow("Recording", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out.release()
cv2.destroyAllWindows()
cap = cv2.VideoCapture("video.mp4")
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Convert frame to grayscale
cv2.imshow("Gray Video", gray)
if cv2.waitKey(25) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
edges = cv2.Canny(frame, 100, 200) # Apply Canny edge detector
cv2.imshow("Edges", edges)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
# Example: Quit on 'q', pause on 'p'
paused = False
cap = cv2.VideoCapture("video.mp4")
while cap.isOpened():
if not paused:
ret, frame = cap.read()
if not ret:
break
cv2.imshow("Video", frame)
key = cv2.waitKey(30) & 0xFF
if key == ord('q'):
break
elif key == ord('p'):
paused = not paused # Toggle pause
cap.release()
cv2.destroyAllWindows()
import cv2
import numpy as np
cap = cv2.VideoCapture('video.mp4')
ret, frame1 = cap.read()
prev_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
# Parameters for ShiTomasi corner detection (for sparse flow)
feature_params = dict(maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7)
# Parameters for Lucas-Kanade optical flow
lk_params = dict(winSize=(15,15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
p0 = cv2.goodFeaturesToTrack(prev_gray, mask=None, **feature_params)
while True:
ret, frame2 = cap.read()
if not ret:
break
frame_gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
# Calculate optical flow (Sparse)
p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, frame_gray, p0, None, **lk_params)
# Select good points
good_new = p1[st==1]
good_old = p0[st==1]
# Visualization skipped for brevity
prev_gray = frame_gray.copy()
p0 = good_new.reshape(-1,1,2)
cap.release()
cv2.destroyAllWindows()
cap = cv2.VideoCapture('video.mp4')
ret, frame1 = cap.read()
prev_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
while True:
ret, frame2 = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
0.5, 3, 15, 3, 5, 1.2, 0)
# Visualization skipped for brevity
prev_gray = gray
cap.release()
cv2.destroyAllWindows()
# Example: Draw lines for sparse flow points
for i,(new,old) in enumerate(zip(good_new, good_old)):
a,b = new.ravel()
c,d = old.ravel()
cv2.line(frame2, (a,b), (c,d), (0,255,0), 2)
cv2.circle(frame2, (a,b), 5, (0,0,255), -1)
import cv2
cap = cv2.VideoCapture(0) # Open camera
# Create tracker object (example: KCF)
tracker = cv2.TrackerKCF_create()
ret, frame = cap.read()
bbox = cv2.selectROI("Frame", frame, False) # Select object to track
tracker.init(frame, bbox)
while True:
ret, frame = cap.read()
if not ret:
break
success, bbox = tracker.update(frame)
if success:
# Draw bounding box
x, y, w, h = map(int, bbox)
cv2.rectangle(frame, (x,y), (x+w, y+h), (0,255,0), 2)
else:
cv2.putText(frame, "Tracking failure", (50,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,255), 2)
cv2.imshow("Tracking", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
trackers = cv2.MultiTracker_create()
# Add multiple objects
bbox1 = cv2.selectROI("Frame", frame, False)
tracker1 = cv2.TrackerKCF_create()
trackers.add(tracker1, frame, bbox1)
# Add more trackers as needed...
while True:
ret, frame = cap.read()
if not ret:
break
success, boxes = trackers.update(frame)
for box in boxes:
x, y, w, h = map(int, box)
cv2.rectangle(frame, (x,y), (x+w, y+h), (255,0,0), 2)
cv2.imshow("Multi Tracking", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
import cv2
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
img = cv2.imread("group.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
for (x, y, w, h) in faces:
cv2.rectangle(img, (x,y), (x+w, y+h), (255,0,0), 2)
cv2.imshow("Faces", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 5)
for (x,y,w,h) in faces:
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2)
cv2.imshow("Face Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
import cv2
net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "model.caffemodel")
image = cv2.imread("image.jpg")
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, (300,300), 127.5)
net.setInput(blob)
detections = net.forward()
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5:
box = detections[0, 0, i, 3:7] * [w, h, w, h]
(startX, startY, endX, endY) = box.astype("int")
cv2.rectangle(image, (startX, startY), (endX, endY), (0,255,0), 2)
cv2.imshow("Detections", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
net = cv2.dnn.readNetFromDarknet("yolov3.cfg", "yolov3.weights")
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
blob = cv2.dnn.blobFromImage(image, 1/255.0, (416,416), swapRB=True, crop=False)
net.setInput(blob)
outputs = net.forward(output_layers)
# Process outputs to extract bounding boxes (code omitted for brevity)
# Load pose estimation model and forward pass similar to detection
# Extract and visualize keypoints on the image
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300,300)), 0.007843, (300,300), 127.5)
net.setInput(blob)
detections = net.forward()
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5:
box = detections[0, 0, i, 3:7] * [frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0]]
(startX, startY, endX, endY) = box.astype("int")
cv2.rectangle(frame, (startX, startY), (endX, endY), (0,255,0), 2)
cv2.imshow("Real-time DNN", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
import cv2
img = cv2.imread('coins.jpg', 0) # Read image in grayscale
# Simple binary threshold
_, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
cv2.imshow('Threshold', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
thresh_adapt = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
cv2.imshow('Adaptive Threshold', thresh_adapt)
cv2.waitKey(0)
cv2.destroyAllWindows()
import numpy as np
# Read image and convert to grayscale
img = cv2.imread('coins.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Threshold and noise removal
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
kernel = np.ones((3,3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# Sure background area
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# Finding sure foreground area
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
_, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
# Finding unknown region
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# Marker labelling
_, markers = cv2.connectedComponents(sure_fg)
markers = markers + 1
markers[unknown == 255] = 0
markers = cv2.watershed(img, markers)
img[markers == -1] = [0, 0, 255] # Mark boundaries in red
cv2.imshow('Watershed Segmentation', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
img = cv2.imread('person.jpg')
mask = np.zeros(img.shape[:2], np.uint8)
bgdModel = np.zeros((1,65), np.float64)
fgdModel = np.zeros((1,65), np.float64)
rect = (50, 50, 450, 290) # ROI for foreground
cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
mask2 = np.where((mask==2)|(mask==0), 0, 1).astype('uint8')
img_cut = img * mask2[:, :, np.newaxis]
cv2.imshow('GrabCut Segmentation', img_cut)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = cv2.imread('chessboard.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = np.float32(gray)
dst = cv2.cornerHarris(gray, 2, 3, 0.04)
dst = cv2.dilate(dst, None)
img[dst > 0.01 * dst.max()] = [0, 0, 255]
cv2.imshow('Harris Corners', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
sift = cv2.SIFT_create()
keypoints, descriptors = sift.detectAndCompute(gray, None)
img_sift = cv2.drawKeypoints(img, keypoints, None)
cv2.imshow('SIFT Features', img_sift)
cv2.waitKey(0)
cv2.destroyAllWindows()
orb = cv2.ORB_create()
keypoints, descriptors = orb.detectAndCompute(gray, None)
img_orb = cv2.drawKeypoints(img, keypoints, None)
cv2.imshow('ORB Features', img_orb)
cv2.waitKey(0)
cv2.destroyAllWindows()
img2 = cv2.imread('scene.jpg')
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
kp2, des2 = sift.detectAndCompute(gray2, None)
bf = cv2.BFMatcher()
matches = bf.knnMatch(descriptors, des2, k=2)
# Apply ratio test
good_matches = []
for m,n in matches:
if m.distance < 0.75 * n.distance:
good_matches.append(m)
img_matches = cv2.drawMatches(img, keypoints, img2, kp2, good_matches, None, flags=2)
cv2.imshow('Matches', img_matches)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
import glob
# Prepare object points for a 9x6 chessboard pattern
objp = np.zeros((6*9,3), np.float32)
objp[:,:2] = np.mgrid[0:9,0:6].T.reshape(-1,2)
objpoints = [] # 3D points in real world space
imgpoints = [] # 2D points in image plane
images = glob.glob('calib_images/*.jpg')
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(gray, (9,6), None)
if ret:
objpoints.append(objp)
imgpoints.append(corners)
cv2.drawChessboardCorners(img, (9,6), corners, ret)
cv2.imshow('Chessboard', img)
cv2.waitKey(100)
cv2.destroyAllWindows()
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
print("Camera matrix:\n", mtx)
print("Distortion coefficients:\n", dist)
img = cv2.imread('test.jpg')
h, w = img.shape[:2]
newcameramtx, roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h))
dst = cv2.undistort(img, mtx, dist, None, newcameramtx)
x, y, w, h = roi
dst = dst[y:y+h, x:x+w]
cv2.imshow('Undistorted Image', dst)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Example of stereo calibration and reconstruction is more complex and typically requires stereo image pairs.
# This is a high-level overview; full implementation involves stereoRectify, compute disparity, and reproject to 3D.
import cv2
img1 = cv2.imread('left.jpg')
img2 = cv2.imread('right.jpg')
# Initialize ORB detector
orb = cv2.ORB_create()
# Find keypoints and descriptors
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)
# Match descriptors using BFMatcher
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2)
matches = sorted(matches, key=lambda x: x.distance)
img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches[:20], None, flags=2)
cv2.imshow('Matches', img_matches)
cv2.waitKey(0)
cv2.destroyAllWindows()
import numpy as np
src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1,1,2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1,1,2)
H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
height, width, _ = img2.shape
result = cv2.warpPerspective(img1, H, (width * 2, height))
result[0:height, 0:width] = img2
cv2.imshow('Panorama', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
stitcher = cv2.Stitcher_create()
status, pano = stitcher.stitch([img1, img2])
if status == cv2.STITCHER_OK:
cv2.imshow('Panorama', pano)
cv2.waitKey(0)
else:
print('Error during stitching')
cv2.destroyAllWindows()
# Formula: s * [u v 1]^T = K * [R | t] * [X Y Z 1]^T
# where:
# s = scale factor
# [u v 1] = pixel coordinates (homogeneous)
# K = intrinsic matrix
# R, t = rotation and translation (extrinsic)
# [X Y Z 1] = 3D world coordinates (homogeneous)
import numpy as np
K = np.array([[fx, 0, cx],
[0, fy, cy],
[0, 0, 1]])
R = np.eye(3) # Example rotation matrix
t = np.array([[tx], [ty], [tz]]) # Translation vector
# Example function to project 3D point
def project_point(X, K, R, t):
X_homog = np.append(X, 1)
RT = np.hstack((R, t))
x_cam = RT @ X_homog
x_img = K @ x_cam
x_img /= x_img[2]
return x_img[:2]
import cv2
import numpy as np
import glob
# Prepare object points (0,0,0), (1,0,0), ..., (8,5,0)
objp = np.zeros((6*9,3), np.float32)
objp[:,:2] = np.mgrid[0:9,0:6].T.reshape(-1,2)
objpoints = []
imgpoints = []
images = glob.glob('calib_images/*.jpg')
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(gray, (9,6), None)
if ret:
objpoints.append(objp)
imgpoints.append(corners)
cv2.drawChessboardCorners(img, (9,6), corners, ret)
cv2.imshow('Corners', img)
cv2.waitKey(100)
cv2.destroyAllWindows()
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
print("Camera Matrix:\n", mtx)
print("Distortion Coefficients:\n", dist)
img = cv2.imread('test.jpg')
h, w = img.shape[:2]
newcameramtx, roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h))
dst = cv2.undistort(img, mtx, dist, None, newcameramtx)
x, y, w, h = roi
dst = dst[y:y+h, x:x+w]
cv2.imshow('Undistorted', dst)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Assuming calibration done and matrices obtained
# Use cv2.stereoRectify(), cv2.initUndistortRectifyMap() to rectify images
import cv2
import numpy as np
left_img = cv2.imread('left.jpg', 0)
right_img = cv2.imread('right.jpg', 0)
stereo = cv2.StereoBM_create(numDisparities=16*5, blockSize=15)
disparity = stereo.compute(left_img, right_img)
cv2.imshow('Disparity Map', disparity)
cv2.waitKey(0)
cv2.destroyAllWindows()
# depth = (focal_length * baseline) / disparity
import cv2
img = cv2.imread('text_image.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('Preprocessed Image', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
import pytesseract
# Ensure pytesseract is installed and Tesseract OCR engine is set up
text = pytesseract.image_to_string(thresh)
print("Recognized Text:")
print(text)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
dilated = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('Dilated Image', dilated)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
# Load YOLO model
net = cv2.dnn.readNetFromDarknet("yolov3.cfg", "yolov3.weights")
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416,416), swapRB=True, crop=False)
net.setInput(blob)
outputs = net.forward(output_layers)
# Process outputs to extract boxes, confidences, and class IDs
# Draw bounding boxes on frame (code omitted for brevity)
cv2.imshow("Real-time Object Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
# Use cv2.dnn.NMSBoxes to perform Non-Maximum Suppression
import cv2
# Check if CUDA is available
print(cv2.cuda.getCudaEnabledDeviceCount())
# Upload image to GPU memory
img = cv2.imread('image.jpg')
gpu_img = cv2.cuda_GpuMat()
gpu_img.upload(img)
# Perform Gaussian blur on GPU
gpu_blur = cv2.cuda.createGaussianFilter(gpu_img.type(), -1, (15, 15), 0)
blurred = gpu_blur.apply(gpu_img)
# Download result back to CPU memory
result = blurred.download()
cv2.imshow('GPU Blur', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Example: Background subtraction with MOG2
cap = cv2.VideoCapture('video.mp4')
fgbg = cv2.createBackgroundSubtractorMOG2()
while True:
ret, frame = cap.read()
if not ret:
break
fgmask = fgbg.apply(frame)
cv2.imshow('Foreground Mask', fgmask)
if cv2.waitKey(30) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
# Load TensorFlow model
net = cv2.dnn.readNetFromTensorflow('frozen_inference_graph.pb', 'graph.pbtxt')
# Use net as usual for forward pass
import cv2
import numpy as np
# Prepare training data: features and labels
trainData = np.random.randint(0, 100, (25, 2)).astype(np.float32)
responses = np.random.randint(0, 2, (25, 1)).astype(np.float32)
# Create and train kNN
knn = cv2.ml.KNearest_create()
knn.train(trainData, cv2.ml.ROW_SAMPLE, responses)
# Predict for new sample
newcomer = np.array([[50, 50]], dtype=np.float32)
ret, results, neighbours, dist = knn.findNearest(newcomer, k=3)
print("Predicted class:", results[0][0])
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setKernel(cv2.ml.SVM_LINEAR)
svm.train(trainData, cv2.ml.ROW_SAMPLE, responses)
pred = svm.predict(newcomer)
print("SVM Prediction:", pred[1][0][0])
dtree = cv2.ml.DTrees_create()
dtree.train(trainData, cv2.ml.ROW_SAMPLE, responses)
pred_dt = dtree.predict(newcomer)
print("Decision Tree Prediction:", pred_dt[1][0][0])
import cv2
import numpy as np
# Load pretrained MobileNet model files
net = cv2.dnn.readNetFromCaffe('mobilenet_deploy.prototxt', 'mobilenet.caffemodel')
img = cv2.imread('dog.jpg')
blob = cv2.dnn.blobFromImage(img, 1/127.5, (224, 224), (127.5, 127.5, 127.5), swapRB=True)
net.setInput(blob)
preds = net.forward()
class_id = np.argmax(preds[0])
confidence = preds[0][class_id]
print(f"Class ID: {class_id}, Confidence: {confidence}")
import cv2
import numpy as np
img = cv2.imread('scene.jpg')
template = cv2.imread('template.jpg', 0)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = max_loc
h, w = template.shape
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img, top_left, bottom_right, (0,255,0), 2)
cv2.imshow('Detected', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
lower_skin = (0, 20, 70)
upper_skin = (20, 255, 255)
mask = cv2.inRange(hsv, lower_skin, upper_skin)
contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
if contours:
cnt = max(contours, key=cv2.contourArea)
cv2.drawContours(frame, [cnt], -1, (0,255,0), 3)
cv2.imshow('Hand Detection', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
import cv2
recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer.yml')
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
img = cv2.imread('test.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray)
for (x,y,w,h) in faces:
roi_gray = gray[y:y+h, x:x+w]
id_, conf = recognizer.predict(roi_gray)
cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
cv2.putText(img, str(id_), (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
cv2.imshow('Face Recognition', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Using OpenCV ArUco markers
import cv2
import cv2.aruco as aruco
cap = cv2.VideoCapture(0)
dictionary = aruco.Dictionary_get(aruco.DICT_6X6_250)
parameters = aruco.DetectorParameters_create()
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
corners, ids, _ = aruco.detectMarkers(gray, dictionary, parameters=parameters)
if ids is not None:
aruco.drawDetectedMarkers(frame, corners, ids)
cv2.imshow('AR Marker Detection', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
import cv2
img = cv2.imread('coins.jpg', 0)
_, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
cv2.imshow('Thresholded', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
import numpy as np
# Assume binary image thresh is obtained
dist_transform = cv2.distanceTransform(thresh, cv2.DIST_L2, 5)
_, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(thresh, sure_fg)
# Marker labeling
_, markers = cv2.connectedComponents(sure_fg)
markers = markers + 1
markers[unknown==255] = 0
markers = cv2.watershed(cv2.cvtColor(img, cv2.COLOR_GRAY2BGR), markers)
img[markers == -1] = [0,0,255]
cv2.imshow('Watershed', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread('low_contrast.jpg', 0)
equ = cv2.equalizeHist(img)
cv2.imshow('Original', img)
cv2.imshow('Equalized', equ)
cv2.waitKey(0)
cv2.destroyAllWindows()
denoised = cv2.medianBlur(img, 5)
cv2.imshow('Denoised', denoised)
cv2.waitKey(0)
cv2.destroyAllWindows()
from PIL import Image
from PIL.ExifTags import TAGS
img = Image.open('image.jpg')
exif_data = img._getexif()
for tag_id, value in exif_data.items():
tag = TAGS.get(tag_id, tag_id)
print(f"{tag}: {value}")
import cv2
cap = cv2.VideoCapture('video.mp4')
fgbg = cv2.createBackgroundSubtractorMOG2()
while True:
ret, frame = cap.read()
if not ret:
break
fgmask = fgbg.apply(frame)
cv2.imshow('Foreground Mask', fgmask)
if cv2.waitKey(30) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
import cv2
import numpy as np
# Load pre-trained SVM model from file
svm = cv2.ml.SVM_load('svm_model.xml')
# Prepare sample input data (2D feature vector)
sample = np.array([[12.5, 3.7]], dtype=np.float32)
# Predict class label
_, result = svm.predict(sample)
print("Predicted class:", result[0][0])
import cv2
images = [cv2.imread('img1.jpg'), cv2.imread('img2.jpg')]
stitcher = cv2.Stitcher_create()
status, pano = stitcher.stitch(images)
if status == cv2.STITCHER_OK:
cv2.imshow('Panorama', pano)
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
print("Stitching failed:", status)
import cv2
import numpy as np
# Load stereo images
imgL = cv2.imread('left.jpg', 0)
imgR = cv2.imread('right.jpg', 0)
stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
disparity = stereo.compute(imgL, imgR)
cv2.imshow('Disparity', disparity)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread('input.png')
# Save as JPEG with quality = 90 (out of 100)
cv2.imwrite('output.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
def reprojection_error(objpoints, imgpoints, rvecs, tvecs, mtx, dist):
total_error = 0
for i in range(len(objpoints)):
imgpoints2, _ = cv2.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
error = cv2.norm(imgpoints[i], imgpoints2, cv2.NORM_L2) / len(imgpoints2)
total_error += error
return total_error / len(objpoints)
# After calibration:
error = reprojection_error(objpoints, imgpoints, rvecs, tvecs, mtx, dist)
print("Mean Reprojection Error:", error)
import cv2
img1 = cv2.imread('img1.jpg', 0)
img2 = cv2.imread('img2.jpg', 0)
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2)
matches = sorted(matches, key=lambda x: x.distance)
img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches[:10], None, flags=2)
cv2.imshow('Matches', img_matches)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
bbox = cv2.selectROI("Frame", frame, False)
tracker = cv2.TrackerCSRT_create()
tracker.init(frame, bbox)
while True:
ret, frame = cap.read()
if not ret:
break
success, bbox = tracker.update(frame)
if success:
x,y,w,h = [int(v) for v in bbox]
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2)
else:
cv2.putText(frame, "Tracking failure", (50,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,255),2)
cv2.imshow('Tracking', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
import cv2
img = cv2.imread('image.jpg')
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
cv2.imshow('HSV Image', hsv)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
cap = cv2.VideoCapture('input_video.mp4')
_, prev = cap.read()
prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
transforms = []
while True:
ret, curr = cap.read()
if not ret:
break
curr_gray = cv2.cvtColor(curr, cv2.COLOR_BGR2GRAY)
flow = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, None, None)[0]
# ... further processing for stabilization ...
prev_gray = curr_gray
cap.release()
import cv2
import numpy as np
img = cv2.imread('image.jpg')
# Invert colors using NumPy
inverted = 255 - img
cv2.imshow('Inverted Image', inverted)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
layer_outputs = net.forward(net.getUnconnectedOutLayersNames())
# Post-processing omitted for brevity...
cv2.imshow('Real-Time Detection', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
import cv2
net = cv2.dnn.readNetFromTensorflow('frozen_inference_graph.pb', 'graph.pbtxt')
image = cv2.imread('image.jpg')
blob = cv2.dnn.blobFromImage(image, size=(300, 300), swapRB=True)
net.setInput(blob)
output = net.forward()
# Process detections...
cv2.imshow('Output', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Sample command to convert and optimize a model with OpenVINO CLI (outside Python)
# mo.py --input_model model.pb --output_dir optimized_model
# In Python, load optimized model as usual with cv2.dnn.readNet()
# Install LabelImg via pip:
# pip install labelImg
# Run labelImg to annotate images and save XML files in Pascal VOC format
# Transfer learning often done in frameworks like TensorFlow or PyTorch,
# but OpenCV can load the resulting models for inference.
# Load fine-tuned model in OpenCV for inference:
net = cv2.dnn.readNet('fine_tuned_model.pb')
import cv2
# Check if CUDA is available
print(cv2.cuda.getCudaEnabledDeviceCount())
# Upload image to GPU memory
img = cv2.imread('image.jpg')
gpu_img = cv2.cuda_GpuMat()
gpu_img.upload(img)
# Perform Gaussian blur on GPU
gpu_blurred = cv2.cuda.createGaussianFilter(gpu_img.type(), -1, (15,15), 0).apply(gpu_img)
blurred = gpu_blurred.download()
cv2.imshow('Blurred with CUDA', blurred)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
cap = cv2.VideoCapture(0) # Pi camera module
ret, frame = cap.read()
if ret:
cv2.imwrite('capture.jpg', frame)
cap.release()
import cv2
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
img = cv2.imread('group_photo.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
roi = img[y:y+h, x:x+w]
roi = cv2.GaussianBlur(roi, (99, 99), 30)
img[y:y+h, x:x+w] = roi
cv2.imshow('Blurred Faces', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import boto3
s3 = boto3.client('s3')
filename = 'image.jpg'
bucket_name = 'mybucket'
s3.upload_file(filename, bucket_name, filename)
# Vision Transformers (ViT) implementation usually in PyTorch or TensorFlow
# Example only conceptual:
# model = ViT(...)
# output = model(input_image_tensor)
import cv2
import numpy as np
cap = cv2.VideoCapture(0)
aruco_dict = cv2.aruco.Dictionary_get(cv2.aruco.DICT_6X6_250)
parameters = cv2.aruco.DetectorParameters_create()
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
corners, ids, rejected = cv2.aruco.detectMarkers(gray, aruco_dict, parameters=parameters)
if ids is not None:
cv2.aruco.drawDetectedMarkers(frame, corners, ids)
cv2.imshow('AR Markers', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
import cv2
img = cv2.imread('coins.jpg', 0)
ret, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
cv2.imshow('Thresholded Image', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
if contours:
cv2.drawContours(frame, [max(contours, key=cv2.contourArea)], -1, (0,255,0), 3)
cv2.imshow('Gesture Recognition', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
import cv2
import pytesseract
img = cv2.imread('text_image.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)[1]
text = pytesseract.image_to_string(thresh)
print("Extracted Text:", text)
import cv2
import numpy as np
img = cv2.imread('fisheye.jpg')
DIM = img.shape[:2][::-1]
K = np.array([[300.0, 0.0, DIM[0]/2],
[0.0, 300.0, DIM[1]/2],
[0.0, 0.0, 1.0]])
D = np.array([-0.1, 0.01, 0.0, 0.0])
map1, map2 = cv2.fisheye.initUndistortRectifyMap(K, D, np.eye(3), K, DIM, cv2.CV_16SC2)
undistorted = cv2.remap(img, map1, map2, interpolation=cv2.INTER_LINEAR)
cv2.imshow('Undistorted Image', undistorted)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
cap1 = cv2.VideoCapture(0) # First camera
cap2 = cv2.VideoCapture(1) # Second camera
while True:
ret1, frame1 = cap1.read()
ret2, frame2 = cap2.read()
if not ret1 or not ret2:
break
cv2.imshow('Camera 1', frame1)
cv2.imshow('Camera 2', frame2)
if cv2.waitKey(1) & 0xFF == 27: # ESC to quit
break
cap1.release()
cap2.release()
cv2.destroyAllWindows()
import cv2
import numpy as np
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 60, 255, cv2.THRESH_BINARY_INV)
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
if contours:
c = max(contours, key=cv2.contourArea)
cv2.drawContours(frame, [c], -1, (0,255,0), 3)
cv2.imshow('Line Following', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
import cv2
import numpy as np
img = cv2.imread('image.jpg', 0)
mean, stddev = cv2.meanStdDev(img)
# Threshold anomalies above mean + 2*stddev
_, anomaly = cv2.threshold(img, mean + 2*stddev, 255, cv2.THRESH_BINARY)
cv2.imshow('Anomalies', anomaly)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
# Connect to drone camera stream (example URL)
cap = cv2.VideoCapture('http://192.168.1.1:8080/video')
while True:
ret, frame = cap.read()
if not ret:
break
cv2.imshow('Drone Camera', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
# Grad-CAM typically implemented with deep learning frameworks (e.g., PyTorch)
# Basic conceptual steps:
# 1. Forward pass input through CNN
# 2. Get gradients of target class output w.r.t. convolutional feature maps
# 3. Compute weighted sum of feature maps
# 4. Overlay heatmap on input image
import cv2
import numpy as np
imgL = cv2.imread('left.jpg', 0)
imgR = cv2.imread('right.jpg', 0)
stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
disparity = stereo.compute(imgL, imgR)
cv2.imshow('Disparity', disparity)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
cap = cv2.VideoCapture(0)
orb = cv2.ORB_create()
while True:
ret, frame = cap.read()
if not ret:
break
keypoints = orb.detect(frame, None)
frame = cv2.drawKeypoints(frame, keypoints, None, color=(0,255,0))
cv2.imshow('ORB Keypoints', frame)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
# Typically involves a pre-trained CNN for image features
# and an LSTM or Transformer for question encoding,
# combined to generate answers.
# Pseudocode:
# img_features = CNN(image)
# question_embedding = LSTM(question)
# answer = decoder(img_features, question_embedding)
# Extract features with CNN (e.g., ResNet)
# Feed features into RNN or Transformer for sequence generation
# Output caption describes image content
# This is usually implemented in deep learning frameworks.
# Two identical CNNs process image pairs
# Output measures similarity
# Used for one-shot/few-shot classification
# Typically implemented with PyTorch or TensorFlow.
# Generator creates fake images
# Discriminator classifies real vs fake
# Both train in competition to improve quality
# Implementation typically in deep learning frameworks.
import cv2
cap = cv2.VideoCapture('video.mp4')
ret, prev_frame = cap.read()
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
diff = cv2.absdiff(prev_gray, gray)
_, thresh = cv2.threshold(diff, 30, 255, cv2.THRESH_BINARY)
cv2.imshow('Frame Difference', thresh)
prev_gray = gray
if cv2.waitKey(30) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
# Acquire image
# Detect target features
# Calculate error between current and desired feature position
# Command robot actuators to minimize error
# Implementation varies widely by robot and system.
import cv2
import numpy as np
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
objp = np.zeros((6*9,3), np.float32)
objp[:,:2] = np.mgrid[0:9,0:6].T.reshape(-1,2)
objpoints = []
imgpoints = []
images = [...] # List of calibration images
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(gray, (9,6), None)
if ret:
objpoints.append(objp)
corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
imgpoints.append(corners2)
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
import cv2
import numpy as np
img = cv2.imread('image.jpg', 0)
laplacian_var = cv2.Laplacian(img, cv2.CV_64F).var()
print("Sharpness (variance of Laplacian):", laplacian_var)
import cv2
img1 = cv2.imread('left.jpg')
img2 = cv2.imread('right.jpg')
stitcher = cv2.Stitcher_create()
status, pano = stitcher.stitch([img1, img2])
if status == cv2.STITCHER_OK:
cv2.imshow('Panorama', pano)
cv2.waitKey(0)
cv2.destroyAllWindows()
else:
print('Error during stitching')
import cv2
import numpy as np
# 3D points in world coordinates
obj_points = np.array([[0,0,0],[1,0,0],[1,1,0],[0,1,0]], dtype=np.float32)
# Corresponding 2D points in image
img_points = np.array([[320,240],[400,240],[400,320],[320,320]], dtype=np.float32)
camera_matrix = np.array([[800,0,320],[0,800,240],[0,0,1]], dtype=np.float32)
dist_coeffs = np.zeros(5)
ret, rvec, tvec = cv2.solvePnP(obj_points, img_points, camera_matrix, dist_coeffs)
print("Rotation Vector:\n", rvec)
print("Translation Vector:\n", tvec)
import cv2
img = cv2.imread('input.png')
cv2.imwrite('output.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
import cv2
img = cv2.imread('image.jpg')
blurred = cv2.GaussianBlur(img, (5,5), 0)
cv2.imshow('Blurred Image', blurred)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
img = cv2.imread('damaged.jpg')
mask = cv2.imread('mask.png', 0)
restored = cv2.inpaint(img, mask, 3, cv2.INPAINT_TELEA)
cv2.imshow('Restored Image', restored)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img = cv2.imread('low_res.jpg')
upscaled = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
cv2.imshow('Upscaled Image', upscaled)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
img1 = cv2.imread('img1.jpg', 0)
img2 = cv2.imread('img2.jpg', 0)
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2)
matches = sorted(matches, key=lambda x:x.distance)
img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches[:10], None, flags=2)
cv2.imshow('Matches', img_matches)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Requires OpenCV DNN module and EAST model files
import cv2
import numpy as np
net = cv2.dnn.readNet('frozen_east_text_detection.pb')
img = cv2.imread('scene.jpg')
(h, w) = img.shape[:2]
blob = cv2.dnn.blobFromImage(img, 1.0, (320,320), (123.68,116.78,103.94), True, False)
net.setInput(blob)
scores, geometry = net.forward(['feature_fusion/Conv_7/Sigmoid','feature_fusion/concat_3'])
# Postprocessing needed to extract boxes (complex, not shown here)
# Typically uses pretrained CNNs (e.g., VGG)
# Optimize output image to minimize content & style loss
# Requires deep learning frameworks
# Conceptual code:
# content_features = CNN(content_image)
# style_features = CNN(style_image)
# output = optimize(content_features, style_features)
import cv2
img = cv2.imread('image.jpg')
flip = cv2.flip(img, 1) # Horizontal flip
rotated = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
cv2.imshow('Original', img)
cv2.imshow('Flipped', flip)
cv2.imshow('Rotated', rotated)
cv2.waitKey(0)
cv2.destroyAllWindows()
# U-Net uses encoder-decoder CNN architecture
# Input image → downsampling → upsampling → pixel classification
# Requires deep learning frameworks for implementation
import cv2
cap = cv2.VideoCapture('video.mp4')
fgbg = cv2.createBackgroundSubtractorMOG2()
while True:
ret, frame = cap.read()
if not ret:
break
fgmask = fgbg.apply(frame)
cv2.imshow('FG Mask', fgmask)
if cv2.waitKey(30) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
# Extract image features with CNN
# Use language models to generate questions
# Combine multimodal features for coherent question generation
# Calculate attention scores between image features
# Weight features based on relevance
# Integrate weighted features into prediction pipeline
# Compute gradient of output w.r.t input image pixels
# Visualize gradients to show influential regions
# Align feature distributions of source and target domains
# Use adversarial training or discrepancy minimization
# Generate augmented image pairs
# Train model to maximize agreement between pairs
# Useful for feature extraction
# Define search space of possible architectures
# Use reinforcement learning or evolutionary methods
# Evaluate performance on validation set
# Split image into fixed-size patches
# Flatten and embed patches as tokens
# Use transformer layers for classification
# Autonomous vehicles, AR/VR, medical imaging, robotics, surveillance
# Advances in hardware and algorithms will enable new possibilities