NumPy (Numerical Python) is a core library for numerical computing in Python. It provides support for large multi-dimensional arrays and matrices, along with a collection of mathematical functions to operate on them efficiently.
# Importing numpy using standard convention import numpy as np # 'np' is a commonly used alias # Creating a simple NumPy array from a Python list arr = np.array([1, 2, 3, 4]) # Converts list to a NumPy array # Display the array print(arr) # Output: [1 2 3 4]
# Comparison: Python list vs NumPy array import numpy as np # Import numpy py_list = [1, 2, 3, 4] # Regular Python list np_array = np.array([1, 2, 3, 4]) # NumPy array # Element-wise multiplication list_result = [x * 2 for x in py_list] # List comprehension array_result = np_array * 2 # Vectorized operation in NumPy print(list_result) # Output: [2, 4, 6, 8] print(array_result) # Output: [2 4 6 8]
NumPy was created in 2005 by Travis Oliphant, built on top of an earlier library called Numeric. Over the years, it has become the foundation for many other libraries in scientific and machine learning communities.
# Using pip to install NumPy pip install numpy # Install using Python package manager # Using conda to install NumPy conda install numpy # Install using Anaconda package manager
# Open a Python shell or script and type: import numpy as np # Import NumPy library print(np.__version__) # Print the installed NumPy version
ndarray
ObjectsThe core of NumPy is the ndarray
(N-dimensional array) object, which is a grid of values of the same type, indexed by a tuple of non-negative integers.
import numpy as np # Import the NumPy library # Create a simple ndarray arr = np.array([10, 20, 30, 40]) # Create a 1D array print(type(arr)) # Output: <class 'numpy.ndarray'> print(arr) # Output: [10 20 30 40]
NumPy arrays use less memory and provide faster access and computations compared to Python lists.
import numpy as np import sys # Import sys to get size of objects import time # For measuring execution time # Python list and NumPy array py_list = [1, 2, 3, 4, 5] np_array = np.array([1, 2, 3, 4, 5]) # Memory usage comparison print(sys.getsizeof(py_list)) # Output: memory in bytes (e.g., 96) print(sys.getsizeof(np_array)) # Output: smaller size (e.g., 112) # Performance comparison start = time.time() for i in range(1000000): py_list = [x * 2 for x in py_list] end = time.time() print("List time:", end - start) start = time.time() np_array = np_array * 2 end = time.time() print("Array time:", end - start)
np.array()
import numpy as np # Import NumPy # Create arrays from lists a1 = np.array([1, 2, 3]) # 1D array a2 = np.array([[1, 2], [3, 4]]) # 2D array (matrix) a3 = np.array([[[1], [2]], [[3], [4]]]) # 3D array print(a1) # Output: [1 2 3] print(a2) # Output: [[1 2], [3 4]] print(a3) # Output: 3D representation
dtype
NumPy arrays can explicitly specify the type of their elements using the dtype
parameter.
import numpy as np # Integer array with 32-bit arr_int = np.array([1, 2, 3], dtype=np.int32) print(arr_int) # Output: [1 2 3] print(arr_int.dtype) # Output: int32 # Float array arr_float = np.array([1, 2, 3], dtype=np.float64) print(arr_float) # Output: [1. 2. 3.] print(arr_float.dtype) # Output: float64
NumPy supports arrays of any dimension using nested lists.
import numpy as np # 1D array arr1 = np.array([1, 2, 3]) print("1D:", arr1.shape) # Output: (3,) # 2D array (matrix) arr2 = np.array([[1, 2], [3, 4]]) print("2D:", arr2.shape) # Output: (2, 2) # 3D array arr3 = np.array([[[1], [2]], [[3], [4]]]) print("3D:", arr3.shape) # Output: (2, 2, 1)
All elements in a NumPy array are of the same type. If different types are given, they are automatically converted to a common type.
import numpy as np # Mixed types in array arr = np.array([1, 2.5, '3']) # Will convert all to strings print(arr) # Output: ['1' '2.5' '3'] print(arr.dtype) # Output: <U32 (Unicode string of length 32)
np.zeros()
, np.ones()
, np.empty()
import numpy as np # Import NumPy # Create an array of zeros zero_array = np.zeros((2, 3)) # 2x3 array filled with 0.0 print(zero_array) # Create an array of ones one_array = np.ones((2, 3)) # 2x3 array filled with 1.0 print(one_array) # Create an empty array (uninitialized values) empty_array = np.empty((2, 3)) # May contain random values print(empty_array)
np.arange()
, np.linspace()
, np.logspace()
import numpy as np # np.arange(start, stop, step) ar = np.arange(0, 10, 2) # Creates [0, 2, 4, 6, 8] print(ar) # np.linspace(start, stop, num) lin = np.linspace(0, 1, 5) # 5 evenly spaced numbers from 0 to 1 print(lin) # np.logspace(start_exp, stop_exp, num) log = np.logspace(1, 3, 3) # 10^1 to 10^3 in 3 steps print(log)
np.eye()
, np.identity()
import numpy as np # np.eye(n): Identity matrix with 1s on the diagonal eye_matrix = np.eye(3) # 3x3 identity matrix print(eye_matrix) # np.eye(n, m, k): Diagonal offset diag_matrix = np.eye(3, 3, k=1) # Diagonal offset by 1 print(diag_matrix) # np.identity(n): Simple identity matrix ident = np.identity(4) # 4x4 identity matrix print(ident)
np.frombuffer()
, np.fromiter()
import numpy as np # np.frombuffer(): Interpret buffer as 1D array buffer = b'ABC' # Byte string buffer_array = np.frombuffer(buffer, dtype='S1') # 1-byte string per element print(buffer_array) # Output: [b'A' b'B' b'C'] # np.fromiter(): Create array from iterable iterable = range(5) # Iterable range object iter_array = np.fromiter(iterable, dtype=int) print(iter_array) # Output: [0 1 2 3 4]
np.fromfunction()
import numpy as np # np.fromfunction(): Use a function to generate values # Example: Create a matrix where value = row + col def func(i, j): return i + j from_func = np.fromfunction(func, (3, 3), dtype=int) print(from_func) # Output: # [[0 1 2] # [1 2 3] # [2 3 4]]
.shape
), Size (.size
), and Number of Dimensions (.ndim
)import numpy as np # Import NumPy # Create a 2D array arr = np.array([[1, 2, 3], [4, 5, 6]]) # Shape: tuple indicating dimensions (rows, columns) print(arr.shape) # Output: (2, 3) # Size: total number of elements print(arr.size) # Output: 6 # ndim: number of dimensions print(arr.ndim) # Output: 2
.dtype
) and Item Size (.itemsize
)import numpy as np # Integer array arr_int = np.array([1, 2, 3], dtype=np.int32) # dtype: data type of elements print(arr_int.dtype) # Output: int32 # itemsize: size in bytes of each element print(arr_int.itemsize) # Output: 4 (bytes)
import numpy as np # Create a 2D array arr = np.array([[1, 2], [3, 4]]) # flags: metadata about memory layout print(arr.flags) # Output includes: # C_CONTIGUOUS : True # F_CONTIGUOUS : False # OWNDATA : True # WRITEABLE : True # ALIGNED : True # WRITEBACKIFCOPY : False
import numpy as np # Create array arr = np.array([10, 20, 30], dtype=np.int8) # Access raw buffer using .data buffer = arr.data # Raw buffer object print(buffer) # Output: <memory at 0x...> # Convert buffer to list (view as bytes) print(list(arr)) # Output: [10, 20, 30]
import numpy as np # Create 2D array arr = np.array([[1, 2, 3], [4, 5, 6]]) # strides: number of bytes to step in each dimension print(arr.strides) # Output: (stride for row, stride for column), e.g., (24, 8) # 24 bytes to move to next row, 8 bytes to move to next column
import numpy as np # Import NumPy # Create a 1D array arr = np.array([10, 20, 30, 40, 50]) # Access first element print(arr[0]) # Output: 10 # Access third element print(arr[2]) # Output: 30
import numpy as np # 1D slicing arr1 = np.array([0, 10, 20, 30, 40, 50]) print(arr1[1:4]) # Output: [10 20 30] (from index 1 to 3) # 2D slicing arr2 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) print(arr2[0:2, 1:3]) # Output: [[2 3], [5 6]] # Rows 0-1, Columns 1-2
import numpy as np # Create 2D array arr = np.array([[5, 10, 15], [20, 25, 30]]) # Access subarray: first row print(arr[0]) # Output: [5 10 15] # Access single element: row 1, column 2 print(arr[1, 2]) # Output: 30
import numpy as np # Create a 3x3 array arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) # Slice rows and access specific column print(arr[0:2, 1]) # Output: [2 5] # Rows 0 and 1, column 1
import numpy as np # Create 1D array arr = np.array([100, 200, 300, 400, 500]) # Negative indexing: last element print(arr[-1]) # Output: 500 # Slicing with step print(arr[::2]) # Output: [100 300 500] # Start to end, step by 2
import numpy as np # Create 3D array arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) # Slice: all layers, first row, all columns print(arr[:, 0, :]) # Output: [[1 2], [5 6]] # Slice: first layer, all rows, column 1 print(arr[0, :, 1]) # Output: [2 4]
import numpy as np # Import NumPy # Create an array arr = np.array([10, 20, 30, 40, 50]) # Create boolean array where condition is true bool_arr = arr > 25 # Check which elements are greater than 25 print(bool_arr) # Output: [False False True True True]
import numpy as np # Create original array arr = np.array([10, 20, 30, 40, 50]) # Use boolean mask to filter values filtered = arr[arr > 25] # Only elements > 25 print(filtered) # Output: [30 40 50]
import numpy as np # Create array arr = np.array([100, 200, 300, 400, 500]) # Use a list of indices to select values indices = [0, 2, 4] selected = arr[indices] print(selected) # Output: [100 300 500]
import numpy as np # Create array arr = np.array([5, 15, 25, 35, 45]) # Boolean mask mask = arr > 20 # Use where + fancy indexing filtered = np.where(mask)[0] # Get index positions print(filtered) # Output: [2 3 4] print(arr[filtered]) # Output: [25 35 45]
import numpy as np # Create array arr = np.array([1, 2, 3, 4, 5]) # Set all elements > 3 to 99 arr[arr > 3] = 99 print(arr) # Output: [ 1 2 3 99 99]
reshape()
and resize()
import numpy as np # Import NumPy library arr = np.arange(6) # Create 1D array with values from 0 to 5 print(arr) # Output: [0 1 2 3 4 5] reshaped = arr.reshape((2, 3)) # Reshape into 2 rows and 3 columns (2x3) print(reshaped) # Output: [[0 1 2], [3 4 5]] arr.resize((3, 2)) # Resize modifies the original array to shape 3x2 print(arr) # Output: [[0 1], [2 3], [4 5]]
ravel()
and flatten()
import numpy as np arr = np.array([[1, 2], [3, 4]]) # Create 2x2 array print(arr) # Output: [[1 2], [3 4]] flat1 = arr.ravel() # Returns a flattened 1D array (view) print(flat1) # Output: [1 2 3 4] flat2 = arr.flatten() # Returns a flattened 1D array (copy) print(flat2) # Output: [1 2 3 4]
.T
and transpose()
import numpy as np arr = np.array([[1, 2, 3], [4, 5, 6]]) # Create a 2x3 array print(arr) # Output: [[1 2 3], [4 5 6]] trans1 = arr.T # Transpose using .T, flips rows and columns print(trans1) # Output: [[1 4], [2 5], [3 6]] trans2 = arr.transpose() # Same effect using transpose() print(trans2) # Output: [[1 4], [2 5], [3 6]]
np.expand_dims()
, np.squeeze()
import numpy as np arr = np.array([1, 2, 3]) # Create 1D array print(arr.shape) # Output: (3,) — 1D shape expanded = np.expand_dims(arr, axis=0) # Add new dimension at axis 0 (row vector) print(expanded.shape) # Output: (1, 3) expanded2 = np.expand_dims(arr, axis=1) # Add new dimension at axis 1 (column vector) print(expanded2.shape) # Output: (3, 1) squeezed = np.squeeze(expanded2) # Remove single-dimensional entries print(squeezed.shape) # Output: (3,)
np.newaxis
import numpy as np arr = np.array([10, 20, 30]) # Create 1D array print(arr.shape) # Output: (3,) row_vector = arr[np.newaxis, :] # Add new axis to make it a row vector print(row_vector.shape) # Output: (1, 3) col_vector = arr[:, np.newaxis] # Add new axis to make it a column vector print(col_vector.shape) # Output: (3, 1)
np.vstack()
, np.hstack()
import numpy as np # Import NumPy a = np.array([1, 2, 3]) # Create 1D array a b = np.array([4, 5, 6]) # Create 1D array b v_stacked = np.vstack((a, b)) # Stack arrays vertically (row-wise) print(v_stacked) # Output: # [[1 2 3] # [4 5 6]] h_stacked = np.hstack((a, b)) # Stack arrays horizontally (column-wise) print(h_stacked) # Output: [1 2 3 4 5 6]
np.column_stack()
, np.row_stack()
import numpy as np a = np.array([1, 2, 3]) # 1D array a b = np.array([4, 5, 6]) # 1D array b col_stacked = np.column_stack((a, b)) # Stack arrays as columns print(col_stacked) # Output: # [[1 4] # [2 5] # [3 6]] row_stacked = np.row_stack((a, b)) # Stack arrays as rows (similar to vstack) print(row_stacked) # Output: # [[1 2 3] # [4 5 6]]
np.concatenate()
import numpy as np a = np.array([[1, 2], [3, 4]]) # 2x2 array a b = np.array([[5, 6], [7, 8]]) # 2x2 array b concat_axis0 = np.concatenate((a, b), axis=0) # Concatenate along rows (axis 0) print(concat_axis0) # Output: # [[1 2] # [3 4] # [5 6] # [7 8]] concat_axis1 = np.concatenate((a, b), axis=1) # Concatenate along columns (axis 1) print(concat_axis1) # Output: # [[1 2 5 6] # [3 4 7 8]]
np.split()
, np.vsplit()
, np.hsplit()
import numpy as np arr = np.arange(16).reshape((4, 4)) # Create 4x4 array with values 0 to 15 print(arr) # Output: # [[ 0 1 2 3] # [ 4 5 6 7] # [ 8 9 10 11] # [12 13 14 15]] split_arr = np.split(arr, 2) # Split into 2 equal arrays vertically (default axis=0) print(split_arr) # Output: # [array([[0, 1, 2, 3], # [4, 5, 6, 7]]), # array([[ 8, 9, 10, 11], # [12, 13, 14, 15]])] v_split = np.vsplit(arr, 2) # Vertical split: same as split with axis=0 print(v_split) # Output same as above h_split = np.hsplit(arr, 2) # Horizontal split: splits columns into 2 arrays print(h_split) # Output: # [array([[ 0, 1], # [ 4, 5], # [ 8, 9], # [12, 13]]), # array([[ 2, 3], # [ 6, 7], # [10, 11], # [14, 15]])]
import numpy as np # Import NumPy arr = np.array([1, 2, 3, 4, 5]) # Original array # Create a view of arr (shares the same data) view_arr = arr.view() print(view_arr) # Output: [1 2 3 4 5] # Create a copy of arr (independent data) copy_arr = arr.copy() print(copy_arr) # Output: [1 2 3 4 5] # Modify view affects original because data is shared view_arr[0] = 99 print(arr) # Output: [99 2 3 4 5] # Modify copy does not affect original copy_arr[1] = 88 print(arr) # Output: [99 2 3 4 5] print(copy_arr) # Output: [99 88 3 4 5]
np.copy()
import numpy as np arr = np.array([10, 20, 30]) # Deep copy creates a new independent array deep_copy = np.copy(arr) deep_copy[0] = 999 # Modify copy only print(arr) # Output: [10 20 30] (original unchanged) print(deep_copy) # Output: [999 20 30]
import numpy as np arr = np.array([1, 2, 3, 4, 5]) # Slicing returns a view (shares data) slice_view = arr[1:4] print(slice_view) # Output: [2 3 4] slice_view[0] = 77 # Modifying slice_view modifies arr print(arr) # Output: [ 1 77 3 4 5]
import numpy as np arr = np.array([1, 2, 3, 4, 5]) view_arr = arr.view() # Create view (shared data) view_arr[1] = 88 # Modify through view print(arr) # Output: [ 1 88 3 4 5] copy_arr = arr.copy() # Create copy (independent) copy_arr[2] = 99 # Modify copy only print(arr) # Output: [ 1 88 3 4 5] (unchanged)
import numpy as np arr = np.array([1, 2, 3, 4, 5]) # Always use copy() if you want independent array safe_copy = arr.copy() safe_copy[0] = 100 # Modify copy safely without affecting original print(arr) # Output: [1 2 3 4 5] print(safe_copy) # Output: [100 2 3 4 5]
int32
, float64
, complex128
, etc.import numpy as np # Import NumPy # Create integer array with default dtype (usually int64 or int32 depending on platform) int_arr = np.array([1, 2, 3, 4]) print(int_arr.dtype) # Output: int64 (on 64-bit) or int32 (on 32-bit) # Create float array with default dtype float64 float_arr = np.array([1.0, 2.5, 3.2]) print(float_arr.dtype) # Output: float64 # Create complex number array explicitly complex_arr = np.array([1+2j, 3+4j]) print(complex_arr.dtype) # Output: complex128
import numpy as np # Create array specifying dtype as float32 to save memory arr_float32 = np.array([1.0, 2.0, 3.0], dtype=np.float32) print(arr_float32.dtype) # Output: float32 # Create integer array specifying dtype int16 arr_int16 = np.array([100, 200, 300], dtype=np.int16) print(arr_int16.dtype) # Output: int16
import numpy as np # When combining int and float, NumPy automatically casts to float arr_mixed = np.array([1, 2.5, 3]) print(arr_mixed) # Output: [1. 2.5 3. ] print(arr_mixed.dtype) # Output: float64 # Adding float to int array promotes the result to float int_arr = np.array([1, 2, 3]) result = int_arr + 2.5 print(result) # Output: [3.5 4.5 5.5] print(result.dtype) # Output: float64
astype()
import numpy as np arr = np.array([1.7, 2.2, 3.8]) # float64 array # Convert float array to int array using astype() int_arr = arr.astype(np.int32) print(int_arr) # Output: [1 2 3] (truncates decimals) print(int_arr.dtype) # Output: int32 # Convert integer array to float64 float_arr = int_arr.astype(np.float64) print(float_arr) # Output: [1. 2. 3.] print(float_arr.dtype) # Output: float64
import numpy as np # Using float32 saves memory but reduces precision compared to float64 arr_float64 = np.array([1.123456789, 2.987654321], dtype=np.float64) arr_float32 = np.array([1.123456789, 2.987654321], dtype=np.float32) print(arr_float64) # Output: [1.12345679 2.98765432] print(arr_float32) # Output: [1.1234568 2.9876542] (less precise) # Memory size difference print(arr_float64.nbytes) # Output: 16 bytes (2 elements * 8 bytes each) print(arr_float32.nbytes) # Output: 8 bytes (2 elements * 4 bytes each)
import numpy as np # Import NumPy library a = np.array([10, 20, 30, 40]) # Create array a b = np.array([1, 2, 3, 4]) # Create array b c_add = a + b # Elementwise addition: adds corresponding elements print(c_add) # Output: [11 22 33 44] c_sub = a - b # Elementwise subtraction: subtracts b elements from a print(c_sub) # Output: [9 18 27 36] c_mul = a * b # Elementwise multiplication print(c_mul) # Output: [10 40 90 160] c_div = a / b # Elementwise division (float division) print(c_div) # Output: [10. 10. 10. 10.] c_mod = a % b # Elementwise modulo (remainder) print(c_mod) # Output: [0 0 0 0] since 10%1=0, 20%2=0, etc.
np.power()
, np.sqrt()
import numpy as np arr = np.array([1, 4, 9, 16]) # Array of numbers pow_arr = np.power(arr, 2) # Square each element (power of 2) print(pow_arr) # Output: [ 1 16 81 256] sqrt_arr = np.sqrt(arr) # Square root of each element print(sqrt_arr) # Output: [1. 2. 3. 4.]
import numpy as np a = np.array([10, 20, 30, 40]) b = np.array([3, 6, 7, 9]) mod_result = a % b # Elementwise modulo (remainder) print(mod_result) # Output: [1 2 2 4] floor_div_result = a // b # Elementwise floor division (quotient without remainder) print(floor_div_result) # Output: [3 3 4 4]
import numpy as np import time size = 10_000_000 # Large array size a = np.arange(size) # Create array [0, 1, 2, ..., size-1] b = np.arange(size) # Using vectorized operations (fast) start_vec = time.time() c = a + b # Elementwise addition, vectorized end_vec = time.time() print("Vectorized time:", end_vec - start_vec) # Very fast # Using Python loop (slow) start_loop = time.time() c_loop = [] for i in range(size): c_loop.append(a[i] + b[i]) # Elementwise addition in loop end_loop = time.time() print("Loop time:", end_loop - start_loop) # Much slower
import numpy as np a = np.array([1, 2, 3]) # Shape (3,) b = np.array([[10], [20], [30]]) # Shape (3,1) # Broadcasting: a (3,) and b (3,1) compatible to shape (3,3) result = a + b print(result) # Output: # [[11 12 13] # [21 22 23] # [31 32 33]]
sum()
, mean()
, std()
, var()
import numpy as np # Import NumPy library arr = np.array([1, 2, 3, 4, 5]) # Create a 1D array with five elements total_sum = arr.sum() # Calculate sum of all elements in array print(total_sum) # Output: 15 mean_val = arr.mean() # Calculate mean (average) of elements print(mean_val) # Output: 3.0 std_dev = arr.std() # Calculate standard deviation (spread of data) print(std_dev) # Output: 1.4142135623730951 variance = arr.var() # Calculate variance (square of std deviation) print(variance) # Output: 2.0
import numpy as np arr2d = np.array([[1, 2, 3], # Create 2D array with 2 rows, 3 columns [4, 5, 6]]) sum_axis0 = arr2d.sum(axis=0) # Sum down columns (axis=0) print(sum_axis0) # Output: [5 7 9] sum_axis1 = arr2d.sum(axis=1) # Sum across rows (axis=1) print(sum_axis1) # Output: [6 15] mean_axis0 = arr2d.mean(axis=0) # Mean down columns (axis=0) print(mean_axis0) # Output: [2.5 3.5 4.5] mean_axis1 = arr2d.mean(axis=1) # Mean across rows (axis=1) print(mean_axis1) # Output: [2. 5.]
min()
, argmin()
, etc.import numpy as np arr = np.array([5, 2, 8, 1, 7]) # Create 1D array min_val = arr.min() # Find minimum value in array print(min_val) # Output: 1 max_val = arr.max() # Find maximum value in array print(max_val) # Output: 8 min_index = arr.argmin() # Find index of minimum value print(min_index) # Output: 3 (index of value 1) max_index = arr.argmax() # Find index of maximum value print(max_index) # Output: 2 (index of value 8)
cumsum()
, cumprod()
import numpy as np arr = np.array([1, 2, 3, 4]) # Create array cumulative_sum = arr.cumsum() # Calculate cumulative sum (running total) print(cumulative_sum) # Output: [1 3 6 10] cumulative_prod = arr.cumprod() # Calculate cumulative product (running product) print(cumulative_prod) # Output: [1 2 6 24]
import numpy as np arr = np.array([1, 2, 3, 4, 5]) # Create array product = arr.prod() # Calculate product of all elements print(product) # Output: 120 (1*2*3*4*5) median_val = np.median(arr) # Calculate median value print(median_val) # Output: 3.0 percentile_25 = np.percentile(arr, 25) # Calculate 25th percentile print(percentile_25) # Output: 2.0 percentile_75 = np.percentile(arr, 75) # Calculate 75th percentile print(percentile_75) # Output: 4.0
Universal functions (UFuncs) are vectorized functions in NumPy that operate elementwise on arrays. They provide fast, efficient computation and avoid explicit Python loops.
NumPy provides many built-in UFuncs such as np.add(), np.subtract(), np.multiply(), np.divide(), np.sqrt(), etc. These functions perform elementwise operations on arrays efficiently.
np.add()
, np.subtract()
, np.multiply()
etc.import numpy as np # Import NumPy library a = np.array([1, 2, 3, 4]) # Define first array b = np.array([10, 20, 30, 40]) # Define second array c = np.add(a, b) # Elementwise addition using np.add() print(c) # Output: [11 22 33 44] d = np.subtract(b, a) # Elementwise subtraction using np.subtract() print(d) # Output: [9 18 27 36] e = np.multiply(a, b) # Elementwise multiplication using np.multiply() print(e) # Output: [10 40 90 160]
out
Parameterimport numpy as np a = np.array([2, 4, 6, 8]) # First array b = np.array([1, 3, 5, 7]) # Second array out_arr = np.empty_like(a) # Create an empty array with same shape and dtype as a np.add(a, b, out=out_arr) # Compute elementwise addition, store result in out_arr print(out_arr) # Output: [3 7 11 15]
import numpy as np arr = np.array([1, 2, 3, 4]) # Define an array # Elementwise operation with scalar (broadcasting scalar to all elements) result = np.multiply(arr, 10) # Multiply each element by 10 print(result) # Output: [10 20 30 40] # Scalar operation example scalar_result = np.add(5, 3) # Adding two scalars print(scalar_result) # Output: 8
reduce()
, accumulate()
, outer()
, and at()
methodsimport numpy as np # Import NumPy arr = np.array([1, 2, 3, 4]) # Create a 1D array # Use reduce to apply an operation cumulatively (like sum) sum_reduce = np.add.reduce(arr) # Sum all elements using reduce method print(sum_reduce) # Output: 10 # Use accumulate to get intermediate results of the operation accum = np.add.accumulate(arr) # Running sum of elements print(accum) # Output: [1 3 6 10] # Use outer to compute the outer product of two arrays outer_product = np.multiply.outer(arr, arr) # Outer product of arr with itself print(outer_product) # Output: # [[ 1 2 3 4] # [ 2 4 6 8] # [ 3 6 9 12] # [ 4 8 12 16]] # Use 'at' method to perform unbuffered in-place operation at specific indices arr_at = np.array([0, 1, 2, 3, 4]) # New array for demonstration indices = [1, 3, 4] # Indices to add 10 to np.add.at(arr_at, indices, 10) # Add 10 to elements at specified indices print(arr_at) # Output: [ 0 11 2 13 14]
np.vectorize()
import numpy as np # Define a Python function to be vectorized def my_func(x): if x % 2 == 0: return x // 2 # If even, return half else: return x * 2 # If odd, double it vec_func = np.vectorize(my_func) # Vectorize the function for arrays arr = np.array([1, 2, 3, 4, 5]) # Input array result = vec_func(arr) # Apply vectorized function elementwise print(result) # Output: [2 1 6 2 10]
np.frompyfunc()
)import numpy as np # Define a Python function for the UFunc def power_plus_one(x, y): return x**y + 1 # Create a UFunc from Python function, with 2 inputs and 1 output custom_ufunc = np.frompyfunc(power_plus_one, 2, 1) a = np.array([1, 2, 3]) b = np.array([2, 3, 4]) result = custom_ufunc(a, b) # Apply the custom UFunc elementwise print(result) # Output: [2 9 82]
import numpy as np import time arr = np.arange(1_000_000) # Large array of 1 million elements # Using a Python loop to compute squares (slow) start = time.time() # Start timer result_loop = np.empty_like(arr) # Preallocate result array for i in range(len(arr)): result_loop[i] = arr[i] ** 2 # Square each element using loop end = time.time() # End timer print("Loop time:", end - start) # Print time taken by loop # Using vectorized operation (fast) start = time.time() # Start timer result_vec = arr ** 2 # Square all elements using vectorized operation end = time.time() # End timer print("Vectorized time:", end - start) # Print time taken by vectorized op
import numpy as np # Import NumPy library a = np.array([1, 2, 3, 4]) # Define first array b = np.array([4, 3, 2, 1]) # Define second array eq = (a == b) # Elementwise equality comparison print(eq) # Output: [False False False False] neq = (a != b) # Elementwise inequality comparison print(neq) # Output: [True True True True] lt = (a < b) # Elementwise less than comparison print(lt) # Output: [ True True False False] gt = (a > b) # Elementwise greater than comparison print(gt) # Output: [False False True True]
import numpy as np a = np.array([True, False, True, False]) # Boolean array a b = np.array([True, True, False, False]) # Boolean array b land = np.logical_and(a, b) # Elementwise logical AND operation print(land) # Output: [ True False False False] lor = np.logical_or(a, b) # Elementwise logical OR operation print(lor) # Output: [ True True True False] lnot = np.logical_not(a) # Elementwise logical NOT operation on a print(lnot) # Output: [False True False True]
np.where()
and Conditional Selectionsimport numpy as np arr = np.array([10, 20, 30, 40, 50]) # Input array # Use np.where to select elements based on condition (elements > 25) result = np.where(arr > 25, arr, 0) # If element > 25 keep it, else set 0 print(result) # Output: [ 0 0 30 40 50] # np.where with three arguments: condition, value if true, value if false result2 = np.where(arr > 25, 'big', 'small') # Label elements conditionally print(result2) # Output: ['small' 'small' 'big' 'big' 'big']
np.count_nonzero()
import numpy as np arr = np.array([1, 2, 3, 4, 5, 6]) # Input array # Count how many elements are greater than 3 count = np.count_nonzero(arr > 3) print(count) # Output: 3 (elements 4,5,6)
import numpy as np arr = np.array([10, 20, 30, 40, 50, 60]) # Input array # Filter elements greater than 20 and less than 55 filtered = arr[(arr > 20) & (arr < 55)] print(filtered) # Output: [30 40 50] # Filter elements that are less than 15 or greater than 45 filtered_or = arr[(arr < 15) | (arr > 45)] print(filtered_or) # Output: [10 50 60]
# Broadcasting allows NumPy to perform arithmetic operations on arrays of different shapes # without explicitly replicating data, saving memory and improving performance. import numpy as np a = np.array([1, 2, 3]) # 1D array with shape (3,) b = 2 # Scalar value result = a + b # Scalar broadcasted to match shape of a print(result) # Output: [3 4 5]
# Broadcasting rules to align shapes: # 1. If arrays have different numbers of dimensions, prepend 1s to the smaller shape. # 2. Dimensions are compatible when they are equal or one of them is 1. # 3. If dimensions are not compatible, broadcasting raises an error. import numpy as np x = np.ones((3, 1)) # Shape (3,1) y = np.ones((1, 4)) # Shape (1,4) # Broadcast shapes (3,1) and (1,4) to (3,4) z = x + y print(z) # Output: # [[2. 2. 2. 2.] # [2. 2. 2. 2.] # [2. 2. 2. 2.]]
import numpy as np # Simulate a grayscale image of shape (3,3) image = np.array([[100, 150, 200], [50, 75, 125], [0, 25, 50]]) brightness_increase = 10 # Scalar to increase brightness # Broadcasting adds brightness_increase to every pixel brighter_image = image + brightness_increase print(brighter_image) # Output: # [[110 160 210] # [ 60 85 135] # [ 10 35 60]]
np.newaxis
import numpy as np a = np.array([1, 2, 3]) # Shape (3,) # Add a new axis to convert it to a column vector (shape (3,1)) a_col = a[:, np.newaxis] print(a_col) # Output: # [[1] # [2] # [3]] # Now you can broadcast with arrays of shape (1, n) b = np.array([10, 20, 30, 40]) # Shape (4,) result = a_col + b # Broadcast shapes (3,1) + (4,) -> (3,4) print(result) # Output: # [[11 21 31 41] # [12 22 32 42] # [13 23 33 43]]
import numpy as np a = np.array([1, 2, 3]) # Shape (3,) b = np.array([1, 2]) # Shape (2,) try: c = a + b # Attempt to add incompatible shapes (3,) and (2,) except ValueError as e: print("Error:", e) # Output: operands could not be broadcast together with shapes (3,) (2,)
import numpy as np # Import NumPy library arr = np.array([3, 1, 4, 1, 5, 9, 2]) # Define an unsorted array sorted_arr = np.sort(arr) # np.sort returns a sorted copy of the array print(sorted_arr) # Output: [1 1 2 3 4 5 9] arr.sort() # .sort() sorts the array in-place, modifying original array print(arr) # Output: [1 1 2 3 4 5 9]
import numpy as np arr = np.array([50, 10, 30, 20]) # Original array indices = np.argsort(arr) # Get indices that would sort the array print(indices) # Output: [1 3 2 0] # Reorder array using these indices sorted_arr = arr[indices] print(sorted_arr) # Output: [10 20 30 50]
import numpy as np arr = np.array([10, 20, 30, 40, 50]) # np.where returns indices where condition is True indices_where = np.where(arr > 25) print(indices_where) # Output: (array([2, 3, 4]),) # np.searchsorted finds insertion position to maintain sorted order sorted_arr = np.sort(arr) pos = np.searchsorted(sorted_arr, 35) print(pos) # Output: 3 (35 should be inserted at index 3) # np.nonzero returns indices of non-zero elements arr2 = np.array([0, 1, 0, 3, 0]) nz = np.nonzero(arr2) print(nz) # Output: (array([1, 3]),)
import numpy as np arr = np.array([1, 2, 2, 3, 3, 3, 4]) # Count number of elements equal to 2 count_2 = np.count_nonzero(arr == 2) print(count_2) # Output: 2 # Count number of unique elements and their counts unique, counts = np.unique(arr, return_counts=True) print(unique) # Output: [1 2 3 4] print(counts) # Output: [1 2 3 1]
import numpy as np # Import NumPy library arr = np.array([1, 2, 2, 3, 4, 4, 5]) # Define an array with duplicate elements unique_elements = np.unique(arr) # np.unique returns sorted unique elements print(unique_elements) # Output: [1 2 3 4 5]
import numpy as np a = np.array([1, 2, 3, 4]) # First array b = np.array([3, 4, 5, 6]) # Second array # Intersection: elements common to both arrays intersection = np.intersect1d(a, b) print(intersection) # Output: [3 4] # Union: all unique elements from both arrays union = np.union1d(a, b) print(union) # Output: [1 2 3 4 5 6] # Difference: elements in a but not in b difference = np.setdiff1d(a, b) print(difference) # Output: [1 2] # Symmetric difference (XOR): elements in either a or b but not both xor = np.setxor1d(a, b) print(xor) # Output: [1 2 5 6]
import numpy as np a = np.array([1, 2, 3, 4]) # Array to test membership against b = np.array([2, 4, 6]) # Values to check membership = np.in1d(b, a) # Check which elements of b are in a print(membership) # Output: [ True True False] # This tells that 2 and 4 are in a, but 6 is not.
import numpy as np a = np.array([1, 2, 3]) b = np.array([3, 2, 1]) c = np.array([1, 2, 4]) # Check if arrays have same unique elements (set equality) equal_ab = np.array_equal(np.sort(a), np.sort(b)) # Sort and compare print(equal_ab) # Output: True equal_ac = np.array_equal(np.sort(a), np.sort(c)) print(equal_ac) # Output: False # Check if all elements of a are in b all_in = np.all(np.in1d(a, b)) print(all_in) # Output: True # Check if all elements of c are in a all_in_c = np.all(np.in1d(c, a)) print(all_in_c) # Output: False
import numpy as np # Import NumPy library for random number generation # The np.random module contains many functions for generating random numbers # from different distributions, shuffling data, and controlling randomness reproducibility.
# Generate random numbers from a uniform distribution over [0, 1) uniform_samples = np.random.rand(5) # Generate 5 uniform random numbers between 0 and 1 print(uniform_samples) # Output: e.g. [0.123, 0.456, 0.789, 0.101, 0.112]
# Generate samples from the standard normal distribution (mean=0, std=1) normal_samples = np.random.randn(5) # Generate 5 samples from normal distribution print(normal_samples) # Output: e.g. [ 0.5, -1.2, 0.7, 1.1, -0.3]
# Generate random integers from low (inclusive) to high (exclusive) rand_ints = np.random.randint(low=10, high=20, size=5) # 5 random integers between 10 and 19 print(rand_ints) # Output: e.g. [12, 15, 17, 11, 19]
arr = np.array([1, 2, 3, 4, 5]) # Original array np.random.shuffle(arr) # Shuffle array in-place, modifying arr directly print(arr) # Output: e.g. [3 1 5 2 4] arr2 = np.array([10, 20, 30, 40]) shuffled = np.random.permutation(arr2) # Returns a shuffled copy, original unchanged print(shuffled) # Output: e.g. [20 40 10 30] print(arr2) # Original array unchanged: [10 20 30 40]
np.random.seed(42) # Set the seed for reproducibility print(np.random.rand(3)) # Generate 3 uniform random numbers, always same on every run # Output: [0.37454012 0.95071431 0.73199394] np.random.seed(42) # Reset seed again to reproduce same results print(np.random.rand(3)) # Output matches above
# Beta distribution: used for modeling probabilities (values between 0 and 1) beta_samples = np.random.beta(a=2.0, b=5.0, size=5) # a and b are shape parameters print(beta_samples) # Output: e.g. [0.1, 0.3, 0.2, 0.15, 0.05] # Binomial distribution: models number of successes in n trials binomial_samples = np.random.binomial(n=10, p=0.5, size=5) # n=trials, p=success probability print(binomial_samples) # Output: e.g. [4, 6, 5, 7, 3] # Poisson distribution: models count of events in fixed interval poisson_samples = np.random.poisson(lam=3.0, size=5) # lam=expected events per interval print(poisson_samples) # Output: e.g. [2, 4, 3, 1, 5]
# rand() with shape tuple for multi-dimensional arrays random_matrix = np.random.rand(3, 4) # 3x4 matrix of uniform random numbers print(random_matrix) # Output: 3 rows and 4 columns of random floats # randint with shape for multi-dimensional arrays random_int_matrix = np.random.randint(0, 100, size=(2, 3)) # 2x3 matrix of random ints from 0 to 99 print(random_int_matrix)
import numpy as np # Import NumPy library # Create an array with some valid numbers and a missing value represented by np.nan arr = np.array([1, 2, np.nan, 4, 5]) print(arr) # Output the array with np.nan included # Output: # [ 1. 2. nan 4. 5.]
# Import masked array module from NumPy import numpy.ma as ma # Create a masked array masking the element which is np.nan masked_arr = ma.masked_array(arr, mask=np.isnan(arr)) print(masked_arr) # Output the masked array showing masked elements # Output: # [1.0 2.0 -- 4.0 5.0]
# Detect which elements are NaN using np.isnan() nan_mask = np.isnan(arr) print(nan_mask) # Boolean mask showing True where values are NaN # Output: # [False False True False False] # Detect finite values (not NaN or Inf) using np.isfinite() finite_mask = np.isfinite(arr) print(finite_mask) # Boolean mask True where values are finite # Output: # [ True True False True True]
# Filter array to keep only finite (non-NaN) values filtered_arr = arr[np.isfinite(arr)] print(filtered_arr) # Print array after filtering out NaNs # Output: # [1. 2. 4. 5.]
# Replace NaN values with 0 using np.nan_to_num() replaced_arr = np.nan_to_num(arr, nan=0) print(replaced_arr) # Print array where NaNs replaced by 0 # Output: # [1. 2. 0. 4. 5.]
# Example array with NaNs arr2 = np.array([10, np.nan, 30]) # Sum of array elements; result will be NaN due to propagation total = np.sum(arr2) print(total) # Output will be nan because sum includes a NaN # Output: # nan # Use np.nansum() to ignore NaNs in summation total_ignore_nan = np.nansum(arr2) print(total_ignore_nan) # Output sum ignoring NaNs # Output: # 40.0
# Simple example: replace NaNs by the mean of non-NaN values mean_val = np.nanmean(arr) # Compute mean ignoring NaNs print("Mean value ignoring NaNs:", mean_val) # Output: # Mean value ignoring NaNs: 3.0 # Create a copy to fill NaNs imputed_arr = np.copy(arr) # Replace NaNs with mean_val imputed_arr[np.isnan(imputed_arr)] = mean_val print(imputed_arr) # Print array with NaNs replaced by mean # Output: # [1. 2. 3. 4. 5.]
import numpy as np # Import NumPy library arr = np.array([1, 2, 3, 4, 5]) # Create a sample array np.save('array_file.npy', arr) # Save array to a binary file with .npy extension # The file 'array_file.npy' is created and contains the array data in binary format
# Create two arrays arr1 = np.array([10, 20, 30]) arr2 = np.array([40, 50, 60]) # Save multiple arrays into a single compressed .npz file np.savez('multiple_arrays.npz', first=arr1, second=arr2) # 'multiple_arrays.npz' contains both arrays stored under keys 'first' and 'second'
# Load the single array saved earlier loaded_arr = np.load('array_file.npy') print(loaded_arr) # Output loaded array # Output: # [1 2 3 4 5] # Load multiple arrays from the .npz file loaded_npz = np.load('multiple_arrays.npz') print(loaded_npz.files) # List the keys stored in the .npz file # Output: # ['first', 'second'] print(loaded_npz['first']) # Access the 'first' array # Output: # [10 20 30] print(loaded_npz['second']) # Access the 'second' array # Output: # [40 50 60]
# Save array to a text file with delimiter ',' np.savetxt('array_text.csv', arr, delimiter=',', fmt='%d') # The file 'array_text.csv' is created and contains: # 1,2,3,4,5 # Load the array back from the text file loaded_text_arr = np.loadtxt('array_text.csv', delimiter=',') print(loaded_text_arr) # Output loaded array from text file # Output: # [1. 2. 3. 4. 5.]
# Example structured array with different data types structured_arr = np.array([(1, 2.5, 'Hello'), (2, 3.6, 'World')], dtype=[('id', 'i4'), ('value', 'f4'), ('text', 'U10')]) # Save structured array to CSV file np.savetxt('structured.csv', structured_arr, fmt='%d,%.2f,%s', delimiter=',') # Load structured array from CSV file (note: loading structured arrays from text requires manual dtype specification) loaded_structured = np.genfromtxt('structured.csv', delimiter=',', dtype=None, encoding='utf-8', names=['id', 'value', 'text']) print(loaded_structured) # Output: # [(1, 2.5, 'Hello') (2, 3.6, 'World')]
import pandas as pd # Import pandas library # Convert NumPy array to pandas DataFrame df = pd.DataFrame(arr, columns=['Numbers']) print(df) # Output: # Numbers # 0 1 # 1 2 # 2 3 # 3 4 # 4 5 # Save DataFrame to CSV file df.to_csv('dataframe.csv', index=False) # Load CSV back into pandas DataFrame df_loaded = pd.read_csv('dataframe.csv') print(df_loaded) # Output: # Numbers # 0 1 # 1 2 # 2 3 # 3 4 # 4 5 # Convert pandas DataFrame back to NumPy array array_from_df = df_loaded.to_numpy() print(array_from_df) # Output: # [[1] # [2] # [3] # [4] # [5]]
import numpy as np # Import NumPy library # Define a structured data type with named fields: id (int), age (int), name (string) dtype = np.dtype([('id', 'i4'), ('age', 'i4'), ('name', 'U10')]) # Create a structured array using the defined dtype and data tuples structured_arr = np.array([(1, 25, 'Alice'), (2, 30, 'Bob'), (3, 22, 'Cathy')], dtype=dtype) print(structured_arr) # Output: # [(1, 25, 'Alice') (2, 30, 'Bob') (3, 22, 'Cathy')]
# Access the 'age' field from all records ages = structured_arr['age'] print(ages) # Output: # [25 30 22] # Modify the 'age' of the second record (index 1) structured_arr['age'][1] = 31 print(structured_arr[1]) # Output: # (2, 31, 'Bob')
# Convert structured array to record array for attribute-like access record_arr = structured_arr.view(np.recarray) # Access fields as attributes instead of keys print(record_arr.age) # Output: # [25 31 22] # Modify using attribute style record_arr.name[0] = 'Alicia' print(record_arr[0]) # Output: # (1, 25, 'Alicia')
# Sort structured array by 'age' field sorted_arr = np.sort(structured_arr, order='age') print(sorted_arr) # Output: # [(3, 22, 'Cathy') (1, 25, 'Alice') (2, 31, 'Bob')] # Filter records where age is greater than 23 filtered_arr = structured_arr[structured_arr['age'] > 23] print(filtered_arr) # Output: # [(1, 25, 'Alice') (2, 31, 'Bob')]
# Example: loading structured data from a CSV file using np.genfromtxt() # Assume 'data.csv' contains: # id,age,name # 1,25,Alice # 2,31,Bob # 3,22,Cathy dtype = [('id', 'i4'), ('age', 'i4'), ('name', 'U10')] # Define dtype structured_from_file = np.genfromtxt('data.csv', delimiter=',', dtype=dtype, names=True, encoding='utf-8') print(structured_from_file) # Output: # [(1, 25, 'Alice') (2, 31, 'Bob') (3, 22, 'Cathy')]
import numpy as np # Import numpy library for array operations # Create a 2D array in default C order (row-major) arr_c = np.array([[1, 2, 3], [4, 5, 6]], order='C') # 'order=C' means data is stored row-wise (C-style) # Create a 2D array in Fortran order (column-major) arr_f = np.array([[1, 2, 3], [4, 5, 6]], order='F') # 'order=F' means data is stored column-wise (Fortran-style) print("C-order array:\n", arr_c) print("Fortran-order array:\n", arr_f)
# Check memory layout flags of C-order array print("C-order flags:", arr_c.flags) # Flags show if array is C_CONTIGUOUS (True) or F_CONTIGUOUS (False) # Check memory layout flags of Fortran-order array print("Fortran-order flags:", arr_f.flags) # Flags show if array is F_CONTIGUOUS (True) or C_CONTIGUOUS (False)
# Check if array is contiguous in memory for performance optimization if arr_c.flags['C_CONTIGUOUS']: print("arr_c is stored contiguously in C order (row-major)") if arr_f.flags['F_CONTIGUOUS']: print("arr_f is stored contiguously in Fortran order (column-major)")
# Create large arrays for example a = np.arange(1_000_000) b = np.arange(1_000_000) # Slow method: explicit loop (not recommended) # result = np.empty_like(a) # for i in range(len(a)): # result[i] = a[i] + b[i] # Fast method: vectorized addition (recommended) result = a + b # Vectorized operation uses optimized C code internally print(result[:5]) # Print first 5 results for verification
# Example: view vs copy behavior to save memory original = np.arange(10) # Create original array # Create a view (no data copied) view_arr = original[2:5] view_arr[0] = 100 # Modifies original array as well print("Original after modifying view:", original) # Create a copy (data copied, independent array) copy_arr = original[2:5].copy() copy_arr[0] = 200 # Does NOT modify original array print("Original after modifying copy:", original)
import numpy as np # Import numpy for array operations arr = np.array([1, 2, 3]) # Create 1D array with 3 elements print("Original shape:", arr.shape) # Output: (3,) --> 1D array with length 3 # Add a new axis to convert 1D array to 2D row vector row_vec = arr[np.newaxis, :] print("Row vector shape:", row_vec.shape) # Output: (1, 3) --> 2D array with 1 row, 3 columns # Add a new axis to convert 1D array to 2D column vector col_vec = arr[:, np.newaxis] print("Column vector shape:", col_vec.shape) # Output: (3, 1) --> 2D array with 3 rows, 1 column
# Create arrays with compatible shapes for broadcasting a = np.array([[1], [2], [3]]) # Shape (3,1) b = np.array([10, 20, 30]) # Shape (3,) print("a shape:", a.shape) # (3,1) print("b shape:", b.shape) # (3,) # Broadcasting rules allow adding these arrays result = a + b # 'b' is broadcasted to shape (3,3), 'a' is broadcasted to (3,3) print("Broadcasted result:\n", result) # Output: # [[11 21 31] # [12 22 32] # [13 23 33]]
# Create a 1D array x = np.arange(3) # [0, 1, 2], shape (3,) # Reshape x to (3,1) column vector using reshape method x_reshaped = x.reshape((3, 1)) print("x reshaped shape:", x_reshaped.shape) # (3,1) # Create a 1D array y with 2 elements y = np.array([10, 20]) # shape (2,) # Add x_reshaped and y with broadcasting result = x_reshaped + y # x_reshaped broadcasted to (3,2), y broadcasted to (3,2) print("Result shape:", result.shape) # (3, 2) print("Result:\n", result) # Output: # [[10 20] # [11 21] # [12 22]]
# Example: broadcasting with color image channels # Create a 3D array simulating an image batch of 2 images, # each with 3 rows, 4 columns, and 3 color channels (RGB) images = np.random.randint(0, 256, (2, 3, 4, 3)) print("Images shape:", images.shape) # (2, 3, 4, 3) # Suppose we want to add brightness offset to each channel differently brightness_offset = np.array([10, 20, 30]) # Shape (3,) for RGB channels # Broadcasting brightness_offset across images and pixels brightened_images = images + brightness_offset print("Brightened images shape:", brightened_images.shape) # (2, 3, 4, 3)
import numpy as np # Import numpy for handling date/time arrays # Create a datetime64 object representing a specific date date = np.datetime64('2025-06-07') print("Date:", date) # Output: 2025-06-07 # Create a timedelta64 object representing a duration of 5 days duration = np.timedelta64(5, 'D') print("Duration:", duration) # Output: 5 days
# Create an array of dates starting from a specific day, daily frequency dates = np.arange('2025-06-01', '2025-06-08', dtype='datetime64[D]') print("Dates array:", dates) # Output: ['2025-06-01' '2025-06-02' '2025-06-03' '2025-06-04' '2025-06-05' '2025-06-06' '2025-06-07'] # Add a timedelta of 2 days to each date new_dates = dates + np.timedelta64(2, 'D') print("New dates after adding 2 days:", new_dates) # Output: ['2025-06-03' '2025-06-04' '2025-06-05' '2025-06-06' '2025-06-07' '2025-06-08' '2025-06-09']
# Calculate difference between two datetime64 objects diff = np.datetime64('2025-06-10') - np.datetime64('2025-06-07') print("Difference:", diff) # Output: 3 days # Compare dates elementwise to filter dates before a threshold mask = dates < np.datetime64('2025-06-05') print("Dates before 2025-06-05:", dates[mask]) # Output: ['2025-06-01' '2025-06-02' '2025-06-03' '2025-06-04']
# Note: NumPy datetime64 does not support time zones directly, # but you can work with UTC times or use pandas for full time zone support. # Example of converting datetime64 to string for further timezone handling date_str = np.datetime64('2025-06-07').astype('str') print("Date as string:", date_str) # Output: '2025-06-07'
import numpy as np # Import numpy library for polynomial operations # Polynomial: 2x^3 + 3x^2 + 0x + 5 coefficients = np.array([2, 3, 0, 5]) # Coefficients array represents polynomial in descending powers of x print("Polynomial coefficients:", coefficients) # Output: [2 3 0 5]
# Evaluate polynomial at x = 2 using np.polyval x = 2 value = np.polyval(coefficients, x) print("Polynomial evaluated at x=2:", value) # Calculation: 2*(2^3) + 3*(2^2) + 0*(2) + 5 = 16 + 12 + 0 + 5 = 33 # Output: 33
# Find roots (solutions) of the polynomial equation 2x^3 + 3x^2 + 0x + 5 = 0 roots = np.roots(coefficients) print("Polynomial roots:", roots) # Roots may be real or complex numbers, output is an array of roots
# Define another polynomial: x^2 + 2x + 1 coeffs2 = np.array([1, 2, 1]) # Polynomial addition (sum of coefficients) sum_poly = np.polyadd(coefficients, np.pad(coeffs2, (len(coefficients)-len(coeffs2),0), 'constant')) print("Sum of polynomials coefficients:", sum_poly) # Output is coefficients of resulting polynomial after addition # Polynomial multiplication prod_poly = np.polymul(coefficients, coeffs2) print("Product of polynomials coefficients:", prod_poly) # Output coefficients of polynomial multiplication # Polynomial differentiation diff_poly = np.polyder(coefficients) print("Derivative coefficients:", diff_poly) # Derivative reduces degree by 1, coefficients correspond to derivative polynomial
# Given data points x and y x_points = np.array([0, 1, 2, 3, 4]) y_points = np.array([1, 3, 7, 13, 21]) # Fit a polynomial of degree 2 to data points fit_coeffs = np.polyfit(x_points, y_points, 2) print("Fitted polynomial coefficients:", fit_coeffs) # Output coefficients for polynomial that fits points in least squares sense # Evaluate fitted polynomial at x = 2.5 y_fit = np.polyval(fit_coeffs, 2.5) print("Fitted polynomial value at x=2.5:", y_fit)
import numpy as np # Import numpy library for numerical computations including FFT # Create a simple time-domain signal: a sine wave sampled at 8 points t = np.linspace(0, 1, 8, endpoint=False) # np.linspace generates 8 evenly spaced samples from 0 to 1 second, excluding endpoint signal = np.sin(2 * np.pi * 3 * t) # Signal is a sine wave with frequency 3 Hz evaluated at points t print("Time-domain signal:", signal) # Output: array of sine wave values at sampled points
# Compute the Fast Fourier Transform (FFT) of the signal fft_result = np.fft.fft(signal) # FFT converts time-domain signal to frequency domain (complex values representing amplitude and phase) print("FFT result:", fft_result) # Output: complex array representing frequency components # Compute the inverse FFT to recover original signal from frequency domain recovered_signal = np.fft.ifft(fft_result) # Inverse FFT converts frequency-domain back to time-domain signal print("Recovered signal (after inverse FFT):", recovered_signal) # Output: complex numbers close to original signal; small imaginary parts due to numerical error
# Compute the FFT optimized for real-valued input signals real_fft = np.fft.rfft(signal) # rfft returns only the positive frequency terms (real input symmetry optimization) print("Real FFT result:", real_fft) # Output: complex array with fewer values than full FFT for real signals # Inverse real FFT to get back the original time-domain signal recovered_real_signal = np.fft.irfft(real_fft) print("Recovered signal from real FFT:", recovered_real_signal) # Output: real array approximating original signal
# Example: Low-pass filter by zeroing out high-frequency components # Copy FFT result to modify frequencies filtered_fft = fft_result.copy() # Zero out frequencies above a threshold (e.g., keep first 2 components) filtered_fft[3:] = 0 # This removes high-frequency components to smooth signal # Inverse FFT to get filtered time-domain signal filtered_signal = np.fft.ifft(filtered_fft) print("Filtered signal:", filtered_signal) # Output: filtered signal with reduced high-frequency noise # Convolution using FFT: multiplication in frequency domain # Define two simple signals signal1 = np.array([1, 2, 3, 4]) signal2 = np.array([0, 1, 0.5, 0]) # FFT of both signals (zero-padding to same length) fft1 = np.fft.fft(signal1, n=7) fft2 = np.fft.fft(signal2, n=7) # Elementwise multiplication in frequency domain (convolution theorem) convolved_fft = fft1 * fft2 # Inverse FFT to get convolution result in time domain convolved_signal = np.fft.ifft(convolved_fft) print("Convolved signal:", convolved_signal.real) # Output: convolution result, real part extracted as output is complex due to numerical errors
NumPy arrays are the core data structure used by most scientific Python libraries for efficient data processing and numerical computing.
import numpy as np # Import NumPy for array operations from scipy import integrate # Import SciPy's integrate module for numerical integration # Define a function to integrate def f(x): return np.sin(x) # Return sine of x # Numerically integrate f(x) from 0 to pi result, error = integrate.quad(f, 0, np.pi) # integrate.quad performs definite integral with error estimation print("Integral of sin(x) from 0 to pi:", result) # Output: should be close to 2, the exact integral value
import matplotlib.pyplot as plt # Import Matplotlib for plotting import numpy as np # NumPy for data arrays # Create sample data: 0 to 10, 100 points x = np.linspace(0, 10, 100) y = np.sin(x) # Compute sine of each x value plt.plot(x, y) # Plot y vs x plt.title("Sine Wave") # Add title plt.xlabel("x") # Label x-axis plt.ylabel("sin(x)") # Label y-axis plt.grid(True) # Show grid on plot plt.show() # Display the plot window
import numpy as np # Import NumPy for array creation import pandas as pd # Import Pandas for DataFrame handling # Create a NumPy array with random data data = np.random.rand(5, 3) # 5 rows, 3 columns of random floats between 0 and 1 # Convert NumPy array to Pandas DataFrame with column labels df = pd.DataFrame(data, columns=["A", "B", "C"]) print("Pandas DataFrame from NumPy array:") print(df) # Output: display the DataFrame with labeled columns # Convert DataFrame back to NumPy array array_back = df.values # .values attribute returns underlying NumPy array print("NumPy array from DataFrame:") print(array_back) # Output: show original data as NumPy array
import numpy as np # NumPy for data handling from sklearn.linear_model import LinearRegression # Import Linear Regression from scikit-learn # Create example dataset X = np.array([[1], [2], [3], [4], [5]]) # Feature matrix with 5 samples and 1 feature y = np.array([2, 4, 6, 8, 10]) # Target variable, linearly dependent on X (y = 2*X) # Initialize Linear Regression model model = LinearRegression() # Fit model to data model.fit(X, y) # Predict using the fitted model predictions = model.predict(np.array([[6], [7]])) # Predict y values for new inputs 6 and 7 print("Predictions for inputs 6 and 7:", predictions) # Output: predicted values close to 12 and 14
import numpy as np # Import NumPy library for array operations # Define a function f that takes indices i, j and returns i + j def f(i, j): return i + j # Create a 4x4 array by applying function f on indices generated by np.fromfunction arr = np.fromfunction(f, (4, 4), dtype=int) # np.fromfunction calls f(i,j) for each coordinate pair (i,j) in shape (4,4) print("Array created using np.fromfunction():") print(arr) # Display the resulting array where each element is i+j
import numpy as np # Import NumPy # Define one-dimensional coordinate arrays x = np.array([0, 1, 2]) y = np.array([10, 20, 30]) # Generate coordinate matrices from coordinate vectors using meshgrid X, Y = np.meshgrid(x, y) # X contains copies of x along rows; Y contains copies of y along columns print("X coordinate matrix:") print(X) # Print X matrix print("Y coordinate matrix:") print(Y) # Print Y matrix
import numpy as np # Import NumPy # Create a 1D array of elements a = np.array([1, 2, 3, 4]) # Create a square diagonal matrix from 1D array 'a' diag_matrix = np.diag(a) # np.diag places 'a' elements on the main diagonal of a 4x4 matrix print("Diagonal matrix using np.diag():") print(diag_matrix) # Create a 2D array (matrix) b = np.array([[1, 2], [3, 4]]) # Create a diagonal matrix by flattening 2D array 'b' into 1D using np.diagflat diagflat_matrix = np.diagflat(b) # Places flattened elements of b on the diagonal of a larger square matrix print("Diagonal matrix using np.diagflat():") print(diagflat_matrix) # Create an upper triangular matrix from 'b' upper_tri = np.triu(b) # np.triu returns the elements on and above the main diagonal; below diagonal set to zero print("Upper triangular matrix using np.triu():") print(upper_tri) # Create a lower triangular matrix from 'b' lower_tri = np.tril(b) # np.tril returns the elements on and below the main diagonal; above diagonal set to zero print("Lower triangular matrix using np.tril():") print(lower_tri)
import numpy as np # Import NumPy # Create a 3x3 array with numbers 0 to 8 reshaped into 3 rows and 3 columns arr = np.arange(9).reshape(3, 3) print("Original array:") print(arr) # Define lists of row and column indices to select row_indices = [0, 2] # Select 1st and 3rd rows col_indices = [1, 2] # Select 2nd and 3rd columns # Use np.ix_ to create open mesh of indices for fancy indexing subset = arr[np.ix_(row_indices, col_indices)] # np.ix_ helps select the cross product of specified rows and columns print("Subset selected with np.ix_():") print(subset)
import numpy as np # Import NumPy for array operations from PIL import Image # Import PIL for image loading and saving # Load an image and convert to grayscale img = Image.open("example.jpg").convert("L") # Opens 'example.jpg' and converts it to grayscale mode ("L") arr = np.array(img) # Convert the image to a NumPy array print("Original Image Shape:", arr.shape) # Print shape of the image array # Flip the image vertically flipped = np.flipud(arr) # Flip the array upside down using np.flipud Image.fromarray(flipped).save("flipped.jpg") # Save the flipped image # Increase brightness by 50 (clip max at 255) brighter = np.clip(arr + 50, 0, 255).astype(np.uint8) # Add 50 to all pixels and clip values between 0–255; convert to uint8 Image.fromarray(brighter).save("brighter.jpg") # Save the brightened image
import numpy as np # Import NumPy # Generate random exam scores for 100 students in 5 subjects scores = np.random.randint(50, 100, size=(100, 5)) # Calculate average score per student student_avg = np.mean(scores, axis=1) # Mean across columns (subjects) # Calculate average score per subject subject_avg = np.mean(scores, axis=0) # Mean across rows (students) print("Average score per student:") print(student_avg) # Print student-wise average print("Average score per subject:") print(subject_avg) # Print subject-wise average
import numpy as np # Import NumPy # Time steps t = np.linspace(0, 10, 100) # Create 100 time points from 0 to 10 seconds # Initial conditions v0 = 5 # Initial velocity (m/s) a = 9.8 # Acceleration due to gravity (m/s²) # Position calculation using s = v0*t + 0.5*a*t^2 position = v0 * t + 0.5 * a * t**2 # Calculate position for each time point print("Simulated positions:") print(position) # Output position array
import numpy as np # BAD: Modifying a slice without copy a = np.arange(10) b = a[2:6] # b is a view, not a copy b[0] = 999 # This changes a[2] too! print("Modified original array (unexpected side-effect):") print(a) # Shows original 'a' was changed # GOOD: Use .copy() to avoid unintentional modification a = np.arange(10) b = a[2:6].copy() # Creates a new copy, safe to modify b[0] = 999 # This will NOT affect 'a' print("Original array (unchanged after safe copy):") print(a) # 'a' is unchanged now
UFuncs (Universal Functions) in NumPy are vectorized wrappers for simple functions that operate element-by-element on arrays and support broadcasting, type casting, and multiple output capabilities.
np.add
, np.subtract
, np.multiply
, np.divide
, np.remainder
, np.floor_divide
np.sin
, np.cos
, np.tan
, np.arcsin
np.exp
, np.log
, np.log10
, np.log2
np.greater
, np.less_equal
, np.equal
, np.not_equal
np.bitwise_and
, np.bitwise_or
, np.bitwise_xor
, np.invert
np.floor
, np.ceil
, np.rint
, np.round
np.add.reduce
, np.add.accumulate
, np.add.outer
, np.add.at
import numpy as np # Import the NumPy library a = np.array([1, 2, 3, 4]) # Create a 1D array result = np.add.reduce(a) # Perform cumulative addition: 1 + 2 + 3 + 4 print("Sum using reduce:", result) # Output: 10
a = np.array([1, 2, 3, 4]) # Define input array result = np.multiply.accumulate(a) # Multiply cumulatively: [1, 2, 6, 24] print("Accumulated product:", result) # Output: [1 2 6 24]
a = np.array([1, 2, 3]) # Define first array b = np.array([10, 20, 30]) # Define second array result = np.add.outer(a, b) # Outer sum creates a matrix: a[i] + b[j] print("Outer addition:\n", result) # Output: 3x3 matrix
a = np.array([0, 0, 0, 0, 0]) # Start with a zero-filled array indices = np.array([0, 1, 1, 3]) # Indices to update values = np.array([5, 1, 2, 3]) # Values to add np.add.at(a, indices, values) # Adds values to respective indices with accumulation print("Updated array:", a) # Output: [5 3 0 3 0]
angles = np.array([0, np.pi / 2, np.pi]) # Angles in radians sine_values = np.sin(angles) # Calculate sine for each angle print("Sine values:", sine_values) # Output: [0. 1. 0.]
x = np.array([1, 2, 3]) y = np.array([3, 2, 1]) result = np.greater(x, y) # Compare x > y elementwise print("x > y:", result) # Output: [False False True]
a = np.array([1, 2, 3]) b = np.array([4, 5, 6]) out = np.empty(3) # Allocate space for output np.add(a, b, out=out) # Store result in 'out' array print("Sum with out=:", out) # Output: [5. 7. 9.]
def square_plus_one(x): # Define a simple function return x**2 + 1 vectorized_func = np.vectorize(square_plus_one) # Convert it into a ufunc-like vectorized function input_array = np.array([1, 2, 3]) result = vectorized_func(input_array) # Apply to each element print("Custom UFunc result:", result) # Output: [2 5 10]
import numpy as np # Import NumPy a = np.array([1, 2, 3, 4]) # Create a simple array result = np.add.reduce(a) # Reduce applies the function cumulatively → 1+2+3+4 print("Sum using reduce:", result) # Output: 10
a = np.array([1, 2, 3, 4]) # Define input array result = np.multiply.accumulate(a) # Accumulate multiplies cumulatively: [1, 1×2, 1×2×3, 1×2×3×4] print("Product using accumulate:", result) # Output: [1 2 6 24]
a = np.array([1, 2, 3]) # First 1D array b = np.array([10, 20, 30]) # Second 1D array outer_sum = np.add.outer(a, b) # Outer addition creates a matrix of a[i] + b[j] print("Outer sum:") print(outer_sum) # Output is a 3x3 matrix
a = np.array([0, 0, 0, 0, 0]) # Target array to accumulate values indices = np.array([0, 1, 1, 3]) # Indices where values will be added values = np.array([5, 1, 2, 3]) # Values to add np.add.at(a, indices, values) # Adds values at corresponding indices with accumulation print("Array after np.add.at:") print(a) # Output: [5 3 0 3 0]
data = np.arange(1, 6) # Create array [1, 2, 3, 4, 5] running_total = np.add.accumulate(data) # Accumulate sum: [1, 1+2, 1+2+3, ...] print("Running total:") print(running_total) # Output: [1 3 6 10 15]
x = np.array([1, 2, 3]) # Define first array y = np.array([10, 20, 30]) # Define second array outer_product = np.multiply.outer(x, y) # Calculate outer product of two vectors print("Outer product:") print(outer_product) # Output is a 3x3 matrix of x[i]*y[j]
import time arr = np.arange(1_000_000) # Create large array for testing # Timing a for loop start = time.time() total = 0 for val in arr: total += val end = time.time() print("Loop sum:", total, "Time:", end - start) # Timing NumPy ufunc start = time.time() total_np = np.sum(arr) # Vectorized operation end = time.time() print("NumPy sum:", total_np, "Time:", end - start)
import numpy as np # Import NumPy library # Define a structured dtype with nested structure dt = np.dtype([ ('name', 'U10'), # Field 1: name, Unicode string up to 10 characters ('scores', [('math', 'i4'), ('science', 'i4')]) # Nested structure for scores with two integer fields ]) # Create an array with the defined dtype students = np.array([ ('Alice', (85, 90)), ('Bob', (78, 88)) ], dtype=dt) print("Math score of Alice:", students[0]['scores']['math']) # Access nested field: math score of first student
# Define another simple structured array for flat data dt2 = [('id', 'i4'), ('salary', 'f4'), ('active', 'b')] # Create structured array using list of tuples employees = np.array([ (1, 55000.5, True), (2, 62000.0, False) ], dtype=dt2) print("Salary of employee 2:", employees[1]['salary']) # Access salary of the second employee
# Save structured array to binary file using np.save np.save('employees.npy', employees) # Save as binary .npy file # Load array back from binary file loaded_emp = np.load('employees.npy', allow_pickle=True) # Load file with structured dtype print("Loaded active flag:", loaded_emp[0]['active']) # Access loaded data from file
# Create structured dtype for CSV dt3 = [('name', 'U10'), ('age', 'i4'), ('score', 'f4')] # Save to CSV with header and delimiter np.savetxt('students.csv', [('Alice', 23, 85.6), ('Bob', 25, 91.2)], fmt='%s,%d,%.1f', delimiter=',', header='Name,Age,Score', comments='') # Load from CSV with converters loaded_csv = np.genfromtxt('students.csv', delimiter=',', skip_header=1, dtype=dt3) print("Loaded score:", loaded_csv[1]['score']) # Print second student's score
# Sort employees by salary sorted_emp = np.sort(employees, order='salary') print("Employee with lowest salary:", sorted_emp[0]) # Print the one with the lowest salary # Filter employees who are active active_emp = employees[employees['active'] == True] print("Number of active employees:", len(active_emp)) # Count active employees
np.memmap
is a subclass of ndarray that allows for memory-mapped file I/O. It is useful for reading/writing **large binary arrays** directly from disk without loading the entire data into RAM.
import numpy as np # Import NumPy library filename = 'large_array.dat' # Define a filename to store the array # Create a memory-mapped file for a 2D array (10000 x 1000) of float32 fp = np.memmap(filename, dtype='float32', mode='w+', shape=(10000, 1000)) fp[0] = np.arange(1000) # Write data to the first row print("First row written:", fp[0][:10]) # Print first 10 values of the first row fp.flush() # Ensure data is written to disk (flush buffer)
# Reopen the file in read-only mode using memmap fp_read = np.memmap(filename, dtype='float32', mode='r', shape=(10000, 1000)) print("Read from file:", fp_read[0][:10]) # Read and print first 10 elements of the first row
filename
: Name of the file to read/write binary datadtype
: Data type of elements (e.g., float32, int16)mode
: File access mode:
'r'
: Read-only'r+'
: Read and write'w+'
: Create or overwrite and allow reading/writing'c'
: Copy-on-writeshape
: Shape of the array (e.g., 2D, 3D, etc.)for i in range(10): # Loop over first 10 rows fp[i] = i # Set each row to its index value fp.flush() # Save all changes to disk print("Row 5 contents:", fp[5][:10]) # Print first 10 values from row 5
import numpy as np # Import NumPy for array handling import ctypes # ctypes allows calling C functions from shared libraries # Create a NumPy array of type float64 arr = np.array([1.5, 2.5, 3.5], dtype=np.float64) # Load the shared C library (assumes mylib.so is compiled) c_lib = ctypes.CDLL('./mylib.so') # Load the shared object file # Declare the argument and return types for the C function c_lib.process_array.argtypes = [ctypes.POINTER(ctypes.c_double), ctypes.c_int] c_lib.process_array.restype = None # Function returns nothing (void) # Convert NumPy array to pointer for C ptr = arr.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # Get pointer to data # Call the C function with pointer and size c_lib.process_array(ptr, len(arr)) # Call the C function from Python
# File: fastmath.pyx (Cython code must be compiled using a setup.py) def dot_product(double[:] a, double[:] b): cdef int i cdef double result = 0 for i in range(a.shape[0]): result += a[i] * b[i] return result # Compile with: python setup.py build_ext --inplace
import cProfile # Python built-in profiler def slow_function(): total = 0 for i in range(1000000): # Inefficient loop total += i * 2 return total cProfile.run('slow_function()') # Profile the execution time of the function
from numba import njit # Import the JIT decorator import numpy as np @njit # Apply Just-In-Time compilation def fast_sum(arr): total = 0.0 for i in range(len(arr)): total += arr[i] # Loop gets optimized into native code return total a = np.random.rand(1_000_000) # Create a large array print("Sum:", fast_sum(a)) # Call the JIT-compiled function
# Bad cache pattern (column-major access) a = np.random.rand(1000, 1000) total = 0 for i in range(1000): for j in range(1000): total += a[j, i] # Access column-wise: cache miss # Better cache pattern (row-major) total = 0 for i in range(1000): for j in range(1000): total += a[i, j] # Access row-wise: cache hit
a = np.random.rand(1000) b = a # No copy, both point to same data b[0] = 100 # Modifies both a and b c = a.copy() # Deep copy, creates new memory block c[0] = 200 # a remains unchanged
vectorized
operations instead of loops when possiblenumba
for performance-critical loops.copy()
to save memoryimport numpy as np # Import NumPy library import numpy.lib.financial as fin # Import financial functions module # Calculate future value of an investment # fv(rate, nper, pmt, pv) -> Future Value fv = fin.fv(rate=0.05, nper=10, pmt=0, pv=-1000) # Invest $1000 for 10 years at 5% print("Future Value:", fv) # Calculate present value of future cash flow # pv(rate, nper, pmt, fv) -> Present Value pv = fin.pv(rate=0.05, nper=10, pmt=0, fv=2000) # Value today of $2000 received in 10 years print("Present Value:", pv) # Net Present Value from a series of cash flows cash_flows = [-1000, 300, 400, 500, 600] npv = fin.npv(0.1, cash_flows) # NPV at 10% discount rate print("Net Present Value:", npv) # Internal Rate of Return irr = fin.irr(cash_flows) # Calculate IRR that makes NPV = 0 print("Internal Rate of Return:", irr) # Calculate equal payments for a loan # pmt(rate, nper, pv) -> Periodic Payment monthly_payment = fin.pmt(rate=0.05/12, nper=12*5, pv=-10000) # $10k loan, 5 years, 5% APR print("Monthly Payment:", monthly_payment)
import numpy as np # Import NumPy import matplotlib.pyplot as plt # For plotting window functions # Generate common window types N = 50 # Window length bartlett = np.bartlett(N) # Bartlett (triangular) window hamming = np.hamming(N) # Hamming window blackman = np.blackman(N) # Blackman window hann = np.hanning(N) # Hann window # Plot the windows plt.plot(bartlett, label='Bartlett') plt.plot(hamming, label='Hamming') plt.plot(blackman, label='Blackman') plt.plot(hann, label='Hann') plt.legend() plt.title("Window Functions") plt.xlabel("Sample Index") plt.ylabel("Amplitude") plt.grid(True) plt.show() # Display all windows for comparison
# Example: Apply a window to a signal before FFT signal = np.sin(2 * np.pi * np.linspace(0, 1, N)) # Create a basic sine wave windowed_signal = signal * hann # Apply Hann window element-wise fft_output = np.fft.fft(windowed_signal) # Compute FFT of windowed signal fft_magnitude = np.abs(fft_output) # Get magnitude for spectral analysis plt.plot(fft_magnitude) plt.title("FFT of Windowed Signal") plt.xlabel("Frequency Bin") plt.ylabel("Magnitude") plt.grid(True) plt.show() # Visualize frequency components
import numpy as np # Import NumPy import matplotlib.pyplot as plt # For plotting # Create a real-valued signal x = np.linspace(0, 2*np.pi, 100) # Generate 100 points from 0 to 2π signal = np.sin(x) # Sine wave (real-valued signal) # Use rfft: optimized FFT for real-valued input rfft_result = np.fft.rfft(signal) # Returns only positive frequency terms print("Real FFT Output:", rfft_result) # Reconstruct signal using irfft reconstructed = np.fft.irfft(rfft_result) # Inverse of rfft print("Reconstructed Signal:", reconstructed) # Use hfft: FFT for Hermitian symmetric input (real output expected) h_input = np.fft.rfft(signal) # Hermitian input (complex spectrum) hfft_result = np.fft.hfft(h_input) # hfft transforms to real time domain print("Hermitian FFT Output:", hfft_result) # Reconstruct using ihfft ihfft_result = np.fft.ihfft(np.fft.fft(signal)) # Should match original real signal print("Reconstructed from Hermitian:", ihfft_result)
import numpy as np # NumPy for polynomial operations import matplotlib.pyplot as plt # For visualization # Define a polynomial: P(x) = 2x³ + 3x² - x + 5 coeffs = [2, 3, -1, 5] # Differentiate the polynomial deriv = np.polyder(coeffs) # Returns derivative coefficients print("Derivative of Polynomial:", deriv) # Integrate the polynomial integrated = np.polyint(coeffs) # Returns coefficients of indefinite integral print("Integral of Polynomial:", integrated) # Divide one polynomial by another dividend = [1, -3, 2] # x² - 3x + 2 divisor = [1, -1] # x - 1 quotient, remainder = np.polydiv(dividend, divisor) print("Quotient:", quotient) print("Remainder:", remainder) # Find the roots of the original polynomial roots = np.roots(coeffs) # Roots of 2x³ + 3x² - x + 5 print("Roots:", roots) # Fit a polynomial to noisy data x = np.linspace(0, 10, 100) # Generate x-values y = 3 * x**2 + 2 * x + 1 + np.random.normal(0, 5, size=x.shape) # Quadratic + noise # Fit using np.polyfit fit_coeffs = np.polyfit(x, y, deg=2) # Fit a 2nd-degree polynomial print("Fitted Coefficients:", fit_coeffs) # Evaluate polynomial with fitted coefficients fitted_y = np.polyval(fit_coeffs, x) # Plot original noisy data and fitted curve plt.scatter(x, y, label='Noisy Data', color='gray') plt.plot(x, fitted_y, label='Fitted Curve', color='red') plt.legend() plt.title("Polynomial Fit to Noisy Data") plt.xlabel("x") plt.ylabel("y") plt.grid(True) plt.show()
import numpy as np # NumPy for array manipulation from PIL import Image # Pillow for image loading/saving import matplotlib.pyplot as plt # For displaying images # Load an image using PIL and convert to NumPy array image = Image.open("example.jpg") # Load image (ensure the file exists in your directory) image_array = np.array(image) # Convert image to NumPy array print("Original Image Shape:", image_array.shape) # e.g., (height, width, 3) for RGB # Crop the image: extract the top-left 100x100 region cropped = image_array[0:100, 0:100] # Crop rows and columns print("Cropped Shape:", cropped.shape) # Shape of cropped image # Resize the image manually (downsampling) resized = image_array[::2, ::2] # Take every second pixel (downsample by factor of 2) print("Resized Shape:", resized.shape) # Separate color channels (RGB) red_channel = image_array[:, :, 0] # Red green_channel = image_array[:, :, 1] # Green blue_channel = image_array[:, :, 2] # Blue # Reconstruct grayscale image by averaging the RGB channels gray = image_array.mean(axis=2).astype(np.uint8) # Mean across RGB channels print("Grayscale Shape:", gray.shape) # Apply a simple blur using convolution (mean filter) kernel = np.ones((3, 3)) / 9 # 3x3 averaging kernel padded = np.pad(gray, pad_width=1, mode='constant') # Add border for convolution # Apply convolution manually blurred = np.zeros_like(gray) # Output image for i in range(gray.shape[0]): for j in range(gray.shape[1]): region = padded[i:i+3, j:j+3] # 3x3 patch blurred[i, j] = np.sum(region * kernel) # Apply kernel # Edge detection using simple Sobel-like operator sobel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) # Horizontal edges edges = np.zeros_like(gray) # Output edge map for i in range(gray.shape[0]): for j in range(gray.shape[1]): region = padded[i:i+3, j:j+3] edges[i, j] = np.clip(np.sum(region * sobel_x), 0, 255) # Clamp to 0-255 # Thresholding: convert grayscale to binary image threshold = 128 binary = (gray > threshold).astype(np.uint8) * 255 # Pixels > threshold become white (255) # Show original and processed images plt.figure(figsize=(12, 6)) plt.subplot(2, 3, 1) plt.title("Original") plt.imshow(image_array) plt.axis('off') plt.subplot(2, 3, 2) plt.title("Grayscale") plt.imshow(gray, cmap='gray') plt.axis('off') plt.subplot(2, 3, 3) plt.title("Blurred") plt.imshow(blurred, cmap='gray') plt.axis('off') plt.subplot(2, 3, 4) plt.title("Edge Detection") plt.imshow(edges, cmap='gray') plt.axis('off') plt.subplot(2, 3, 5) plt.title("Binary Threshold") plt.imshow(binary, cmap='gray') plt.axis('off') plt.tight_layout() plt.show()
import numpy as np # Import NumPy for numerical array operations # Complex broadcasting scenario with multiple arrays a = np.array([1, 2, 3]) # 1D array of shape (3,) b = np.array([[10], [20], [30]]) # 2D array of shape (3,1) # Broadcasting happens between a (shape (3,)) and b (shape (3,1)) result = a + b # Adds each element in 'a' to each element in 'b' along the expanded dimensions print("Broadcasting result shape:", result.shape) # Expected shape is (3,3) print(result) # Prints the broadcasted addition result # Using np.ix_() for advanced indexing rows = np.array([0, 1]) # Selected row indices cols = np.array([1, 2]) # Selected column indices matrix = np.arange(16).reshape(4, 4) # 4x4 matrix from 0 to 15 print("Original matrix:\n", matrix) # np.ix_ creates an open mesh from the two index arrays, useful for fancy indexing selected = matrix[np.ix_(rows, cols)] # Selects elements at the cross-product of rows and cols print("Selected elements with np.ix_:\n", selected) # Combining broadcasting with structured arrays dtype = [('x', int), ('y', float)] # Define structured dtype with two fields: 'x' and 'y' data = np.zeros(3, dtype=dtype) # Create structured array of length 3 initialized to zeros data['x'] = np.arange(3) # Assign values 0,1,2 to 'x' field data['y'] = np.linspace(0, 1, 3) # Assign 3 values evenly spaced between 0 and 1 to 'y' print("Structured array:\n", data) # Broadcasting to add scalar to 'y' field (advanced broadcasting) data['y'] += 2.0 # Adds 2.0 to every element in 'y' field using broadcasting print("After broadcasting addition on 'y' field:\n", data)
import numpy as np # Core numerical library for arrays # Role of NumPy arrays in ML workflows: efficient numerical data storage and manipulation # Example: Creating a dataset (features) and labels for classification X = np.array([[5.1, 3.5, 1.4, 0.2], # Feature vector for sample 1 [4.9, 3.0, 1.4, 0.2], # Sample 2 [6.2, 3.4, 5.4, 2.3]]) # Sample 3 y = np.array([0, 0, 1]) # Corresponding labels/classes for each sample # Integration with Scikit-learn for training a model from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression # Data preprocessing: standardize features by removing mean and scaling to unit variance scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Fit scaler to data and transform it print("Scaled features:\n", X_scaled) # Model training example model = LogisticRegression() model.fit(X_scaled, y) # Train logistic regression on scaled data print("Model coefficients:\n", model.coef_) # Integration with TensorFlow and PyTorch usually involves converting NumPy arrays # to tensors, e.g., tf.convert_to_tensor(X_scaled) or torch.tensor(X_scaled) # Feature extraction example: mean of each feature across samples feature_means = np.mean(X, axis=0) # Compute mean feature value for each column print("Feature means:", feature_means) # NumPy’s efficiency and interoperability make it the backbone of ML and data science pipelines
import numpy as np # Import NumPy for array operations # Sparse data often contains mostly zeros — storing them densely wastes memory # SciPy provides efficient sparse matrix formats to store and manipulate such data from scipy import sparse # Import sparse module from SciPy # Create a dense matrix with mostly zeros dense_matrix = np.array([ [0, 0, 3, 0], [22, 0, 0, 0], [0, 0, 0, 7], [0, 5, 0, 0] ]) print("Dense matrix:\n", dense_matrix) # Convert dense matrix to Compressed Sparse Row (CSR) format csr_matrix = sparse.csr_matrix(dense_matrix) print("\nCSR sparse matrix representation:\n", csr_matrix) # CSR stores only nonzero elements efficiently # Accessing data, indices, and indptr arrays print("\nCSR data (nonzero values):", csr_matrix.data) print("CSR indices (column indices of data):", csr_matrix.indices) print("CSR indptr (row pointers):", csr_matrix.indptr) # Efficient operations on sparse matrices: matrix multiplication result = csr_matrix.dot(csr_matrix.T) # Multiply sparse matrix by its transpose print("\nResult of sparse matrix multiplication:\n", result.toarray()) # Convert back to dense for display # Integration with NumPy arrays for interoperability dense_again = csr_matrix.toarray() # Convert back to dense NumPy array when needed print("\nConverted back to dense NumPy array:\n", dense_again) # Handling large sparse datasets is memory-efficient and faster with sparse formats, # especially for operations like linear algebra on huge datasets.
import numpy as np # Import NumPy for array creation # Creating user-defined dtype objects allows complex structured data storage # Example: Define a dtype with fields 'name' (string), 'age' (int), 'weight' (float) person_dtype = np.dtype([ ('name', 'U10'), # Unicode string of max length 10 ('age', np.int32), # 32-bit integer ('weight', np.float64) # 64-bit float ]) # Create an array of records with the custom dtype people = np.array([ ('Alice', 25, 55.0), ('Bob', 30, 85.5), ('Charlie', 22, 68.2) ], dtype=person_dtype) print("Structured array of people:\n", people) # Accessing fields by name print("\nNames:", people['name']) print("Ages:", people['age']) # Using NumPy's 'void' type for raw binary data (advanced use case) raw_data = np.array([(b'\x01\x02\x03\x04',)], dtype=[('bytes', 'V4')]) print("\nRaw binary data with void type:", raw_data['bytes']) # Writing C extensions or Cython modules improves performance for critical operations # Example (conceptual): Use Cython to create fast loops or custom ufuncs (not shown here) # C API can be used to manipulate NumPy arrays at a low level in C for performance-critical tasks # Summary: # Custom dtypes provide flexibility for complex data, # and extensions enable boosting speed beyond Python's capabilities.
import numpy as np # Core numerical library # Debugging common NumPy issues: # 1. Watch out for shape mismatches during broadcasting a = np.array([1, 2, 3]) b = np.array([[1], [2]]) # Uncommenting the next line will raise a ValueError due to incompatible shapes for broadcasting # c = a + b # Use print() and shape attributes to diagnose array dimensions print("Shape of a:", a.shape) print("Shape of b:", b.shape) # 2. Beware of unintended views vs copies leading to unexpected side effects x = np.array([10, 20, 30]) y = x[1:3] # y is a view, not a copy y[0] = 99 # This modifies x as well print("Original x after modifying y (view):", x) # Use .copy() when you want an independent copy to avoid side effects z = x[1:3].copy() z[0] = 77 print("Original x after modifying z (copy):", x) # Code organization tips: # - Write small functions for repeated tasks # - Avoid large monolithic scripts # - Use meaningful variable names # - Comment your code liberally, especially for complex operations # Recommended learning resources: # - Official NumPy documentation (https://numpy.org/doc/) # - Books: "Python for Data Analysis" by Wes McKinney, "Numerical Python" by Robert Johansson # - Communities: Stack Overflow, NumPy mailing list, GitHub discussions # Future trends in numerical Python computing: # - Increasing use of Just-In-Time compilers (Numba, Pythran) # - Enhanced GPU support (CuPy, JAX) # - Better integration with ML frameworks # - Continued improvements in multi-core and distributed computing print("Best practices help write efficient, maintainable, and bug-free NumPy code!")