from keras.models import Sequential model = Sequential()History and evolution
import keras print(keras.__version__)Installing Keras & dependencies
pip install tensorflow kerasBackend support (TensorFlow, Theano, CNTK)
from keras import backend as K print(K.backend())Keras vs other frameworks
# Keras uses Sequential or Functional APIs for ease of use model = Sequential()Keras ecosystem overview
from keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data()Hello World with Keras
from keras.models import Sequential from keras.layers import Dense model = Sequential() model.add(Dense(10, input_shape=(5,), activation='relu'))Project structure in Keras
/project /data /models train.py utils.pyCommunity & documentation
# Visit https://keras.io for API references and tutorialsUse cases of Keras
# Keras is used in domains like healthcare, finance, and robotics
# Each artificial neuron: output = activation(weighted_sum + bias)Perceptron model
output = 1 if (w1*x1 + w2*x2 + b) > 0 else 0Feedforward neural networks
model = Sequential([ Dense(10, input_shape=(4,), activation='relu'), Dense(1, activation='sigmoid') ])Activation functions
from keras.layers import Activation model.add(Dense(64)) model.add(Activation('relu'))Loss functions
model.compile(loss='binary_crossentropy', optimizer='adam')Optimizers overview
model.compile(optimizer='adam')Forward and backward propagation
# Automatically handled in model.fit()Epochs and batches
model.fit(X, y, epochs=10, batch_size=32)Overfitting and underfitting
from keras.layers import Dropout model.add(Dropout(0.5))Deep learning in real life
# Example: text sentiment classification or image detection
model = Sequential() model.add(Dense(64, activation='relu', input_shape=(100,)))Adding dense layers
model.add(Dense(32, activation='relu')) model.add(Dense(10, activation='softmax'))Configuring activation functions
model.add(Dense(1, activation='sigmoid'))Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])Model summary
model.summary()Training the model
model.fit(X_train, y_train, epochs=10, batch_size=32)Evaluating model accuracy
loss, acc = model.evaluate(X_test, y_test) print(f"Test accuracy: {acc}")Making predictions
predictions = model.predict(X_new)Saving and loading models
model.save('my_model.h5') loaded_model = keras.models.load_model('my_model.h5')Use case: Basic classifier
model = Sequential([ Dense(128, activation='relu', input_shape=(784,)), Dense(10, activation='softmax') ]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, epochs=5)
from tensorflow.keras.layers import Normalization normalizer = Normalization() normalizer.adapt(data) # data is a NumPy array or tf.data datasetSplitting train/test/validation
from sklearn.model_selection import train_test_split X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)Encoding labels
from tensorflow.keras.utils import to_categorical y_encoded = to_categorical(y)Data generators
from tensorflow.keras.preprocessing.image import ImageDataGenerator datagen = ImageDataGenerator(rescale=1./255) train_gen = datagen.flow_from_directory('train/', target_size=(64, 64))Image preprocessing
img = tf.keras.utils.load_img("cat.jpg", target_size=(64,64)) img_array = tf.keras.utils.img_to_array(img)/255.0Text preprocessing
from tensorflow.keras.preprocessing.text import Tokenizer tokenizer = Tokenizer() tokenizer.fit_on_texts(texts) sequences = tokenizer.texts_to_sequences(texts)Sequence padding
from tensorflow.keras.preprocessing.sequence import pad_sequences padded = pad_sequences(sequences, padding='post', maxlen=100)Handling missing values
import numpy as np X = np.nan_to_num(X) # replaces NaN with 0Batch processing
model.fit(X_train, y_train, batch_size=32, epochs=10)Data augmentation
datagen = ImageDataGenerator(rotation_range=20, horizontal_flip=True) augmented = datagen.flow(X_train, y_train)
from tensorflow.keras.activations import sigmoid output = sigmoid(x)Tanh
from tensorflow.keras.activations import tanh output = tanh(x)ReLU
from tensorflow.keras.layers import Activation model.add(Dense(64)) model.add(Activation('relu'))Leaky ReLU
from tensorflow.keras.layers import LeakyReLU model.add(LeakyReLU(alpha=0.01))ELU
from tensorflow.keras.layers import ELU model.add(ELU(alpha=1.0))Softmax
model.add(Dense(3, activation='softmax'))Swish
from tensorflow.keras.activations import swish output = swish(x)Choosing the right activation
// No code — selection depends on taskCustom activations
from tensorflow.keras.layers import Lambda model.add(Lambda(lambda x: x**2))Visualizing activations
import matplotlib.pyplot as plt import numpy as np x = np.linspace(-10, 10, 100) plt.plot(x, np.maximum(0, x)) # ReLU plt.show()
model.compile(optimizer='adam', loss='mean_squared_error')Binary Crossentropy
model.compile(optimizer='adam', loss='binary_crossentropy')Categorical Crossentropy
model.compile(optimizer='adam', loss='categorical_crossentropy')Sparse Categorical Crossentropy
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')Hinge loss
model.compile(optimizer='adam', loss='hinge')Kullback-Leibler divergence
model.compile(optimizer='adam', loss='kullback_leibler_divergence')Custom loss functions
def custom_loss(y_true, y_pred): return tf.reduce_mean(tf.square(y_true - y_pred) + 0.1) model.compile(optimizer='adam', loss=custom_loss)Choosing loss by task
// Decision logic based on task typeRegularization penalties
from tensorflow.keras import regularizers model.add(Dense(64, kernel_regularizer=regularizers.l2(0.01)))Loss function behavior
import matplotlib.pyplot as plt x = np.linspace(-1, 1, 100) plt.plot(x, x**2) # MSE shape plt.show()
# Basic Gradient Descent theta = theta - learning_rate * gradientStochastic Gradient Descent (SGD)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)Momentum
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)Nesterov Accelerated Gradient
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)Adam
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)RMSprop
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)Adagrad
optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.01)Nadam
optimizer = tf.keras.optimizers.Nadam(learning_rate=0.002)Choosing the right optimizer
# Example: Try different optimizers to compare performanceLearning rate scheduling
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=0.01, decay_steps=10000, decay_rate=0.9) optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
accuracy = (TP + TN) / (TP + TN + FP + FN)Precision & Recall
precision = TP / (TP + FP) recall = TP / (TP + FN)F1 Score
f1_score = 2 * (precision * recall) / (precision + recall)Confusion Matrix
from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_true, y_pred)ROC-AUC
from sklearn.metrics import roc_auc_score auc = roc_auc_score(y_true, y_scores)Mean Absolute Error
from sklearn.metrics import mean_absolute_error mae = mean_absolute_error(y_true, y_pred)R-squared
from sklearn.metrics import r2_score r2 = r2_score(y_true, y_pred)Custom metrics
def custom_metric(y_true, y_pred): return tf.reduce_mean(tf.abs(y_true - y_pred))Visualizing metrics
import matplotlib.pyplot as plt plt.plot(history.history['accuracy'])Metric callbacks
EarlyStopping(monitor='val_accuracy', patience=3)
model.save('model.h5') # HDF5 format model.save('my_model/') # TensorFlow SavedModel formatLoading saved models
model = tf.keras.models.load_model('model.h5')Model checkpoints
ModelCheckpoint(filepath='best_model.h5', save_best_only=True)TensorFlow Lite for mobile
converter = tf.lite.TFLiteConverter.from_saved_model('my_model') tflite_model = converter.convert()TensorFlow.js for web
tensorflowjs_converter --input_format=tf_saved_model my_model/ web_model/Exporting to ONNX
# Use tf2onnx python -m tf2onnx.convert --saved-model my_model --output model.onnxVersioning models
# Save versioned directories: model/v1/, model/v2/Using Pickle
import pickle pickle.dump(model, open('model.pkl', 'wb'))Inference API
@app.route('/predict', methods=['POST']) def predict(): data = request.get_json() prediction = model.predict(data) return jsonify(prediction.tolist())Deployment examples
# Docker example FROM tensorflow/tensorflow:latest COPY model/ /app/model/
from tensorflow.keras.callbacks import Callback # Example of using callbacks in model training model.fit(X, y, epochs=10, callbacks=[callback1, callback2])EarlyStopping
from tensorflow.keras.callbacks import EarlyStopping early_stop = EarlyStopping(monitor='val_loss', patience=3) model.fit(X, y, validation_data=(X_val, y_val), callbacks=[early_stop])ModelCheckpoint
from tensorflow.keras.callbacks import ModelCheckpoint checkpoint = ModelCheckpoint('model.h5', save_best_only=True) model.fit(X, y, validation_split=0.2, callbacks=[checkpoint])LearningRateScheduler
from tensorflow.keras.callbacks import LearningRateScheduler def lr_schedule(epoch): return 0.01 * (0.1 ** (epoch // 10)) lr_sched = LearningRateScheduler(lr_schedule) model.fit(X, y, callbacks=[lr_sched])ReduceLROnPlateau
from tensorflow.keras.callbacks import ReduceLROnPlateau reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=2) model.fit(X, y, validation_data=(X_val, y_val), callbacks=[reduce_lr])TensorBoard callback
from tensorflow.keras.callbacks import TensorBoard tensorboard = TensorBoard(log_dir='./logs') model.fit(X, y, callbacks=[tensorboard])CSVLogger
from tensorflow.keras.callbacks import CSVLogger csv_logger = CSVLogger('training.log') model.fit(X, y, callbacks=[csv_logger])Custom callbacks
class MyCallback(Callback): def on_epoch_end(self, epoch, logs=None): print(f"Epoch {epoch} ended. Loss: {logs['loss']}") model.fit(X, y, callbacks=[MyCallback()])Callback chaining
callbacks = [early_stop, checkpoint, tensorboard] model.fit(X, y, callbacks=callbacks)Monitoring and logging
# All logs can be accessed via `logs` dictionary in custom callbacks def on_epoch_end(self, epoch, logs=None): print(logs['val_accuracy'])
from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Conv2D model = Sequential() model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))Convolution layers
model.add(Conv2D(64, (3, 3), activation='relu'))Pooling layers
from tensorflow.keras.layers import MaxPooling2D model.add(MaxPooling2D(pool_size=(2, 2)))Flattening
from tensorflow.keras.layers import Flatten model.add(Flatten())Feature maps
# Feature maps are automatically generated by Conv2D # Inspect via intermediate model or visualize with matplotlibDropout in CNNs
from tensorflow.keras.layers import Dropout model.add(Dropout(0.5))Image classification with CNNs
model.add(Dense(10, activation='softmax')) # for 10 classesTransfer learning basics
from tensorflow.keras.applications import VGG16 base_model = VGG16(weights='imagenet', include_top=False, input_shape=(64,64,3))Regularization in CNNs
from tensorflow.keras.regularizers import l2 Conv2D(32, (3,3), activation='relu', kernel_regularizer=l2(0.01))Real-world CNN use case
# Example: MRI tumor detection using CNN # Architecture same, but trained on MRI image dataset
from tensorflow.keras.preprocessing.image import load_img img = load_img('cat.jpg', target_size=(64, 64))Rescaling pixels
from tensorflow.keras.preprocessing.image import img_to_array img_array = img_to_array(img) / 255.0ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator datagen = ImageDataGenerator(rescale=1./255) train_gen = datagen.flow_from_directory('train/', target_size=(64,64), class_mode='categorical')Data augmentation
aug_datagen = ImageDataGenerator(rotation_range=20, zoom_range=0.2, horizontal_flip=True)Building CNN for CIFAR-10
model = Sequential([ Conv2D(32, (3,3), activation='relu', input_shape=(32,32,3)), MaxPooling2D(2,2), Flatten(), Dense(10, activation='softmax') ])Fine-tuning pre-trained models
base_model.trainable = True # Unfreeze layers for fine-tuning model.compile(optimizer='adam', loss='categorical_crossentropy')Multi-class classification
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])Batch normalization
from tensorflow.keras.layers import BatchNormalization model.add(BatchNormalization())Visualizing filters
# Use intermediate layer model to get filter outputs from tensorflow.keras.models import Model intermediate_model = Model(inputs=model.input, outputs=model.layers[1].output)Model evaluation
model.evaluate(X_test, y_test)
from tensorflow.keras.applications import VGG16 base_model = VGG16(weights='imagenet', include_top=False)VGG16 usage
model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))ResNet integration
from tensorflow.keras.applications import ResNet50 base_model = ResNet50(weights='imagenet', include_top=False)InceptionNet and Xception
from tensorflow.keras.applications import InceptionV3 base_model = InceptionV3(weights='imagenet', include_top=False)Freezing and unfreezing layers
for layer in base_model.layers: layer.trainable = FalseFeature extraction
x = base_model.output x = GlobalAveragePooling2D()(x) predictions = Dense(10, activation='softmax')(x)Fine-tuning strategy
for layer in base_model.layers[-4:]: layer.trainable = TrueUsing pre-trained weights
model = VGG16(weights="imagenet", include_top=False)Transfer learning on custom data
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit(train_data, epochs=10)Model export
model.save("custom_transfer_model.h5")
from tensorflow.keras.layers import SimpleRNN model.add(SimpleRNN(64, input_shape=(timesteps, features)))Use cases
# Text classification, stock prediction, etc.RNN layers in Keras
from tensorflow.keras.layers import SimpleRNN rnn_layer = SimpleRNN(32)Sequence modeling
model = Sequential() model.add(SimpleRNN(64, return_sequences=True))Vanishing gradient problem
# LSTM/GRU address this with gating mechanismsBidirectional RNNs
from tensorflow.keras.layers import Bidirectional model.add(Bidirectional(SimpleRNN(64)))Masking and padding
from tensorflow.keras.layers import Masking model.add(Masking(mask_value=0.0, input_shape=(timesteps, features)))Simple RNN model
model = Sequential([ SimpleRNN(64), Dense(1, activation='sigmoid') ])Forecasting with RNNs
# Input shape: (batch_size, timesteps, features) model.fit(X_train, y_train, epochs=20)Real-world applications
# Google Translate and Alexa use RNN-based models
from tensorflow.keras.layers import LSTM model.add(LSTM(64))GRU vs LSTM
from tensorflow.keras.layers import GRU model.add(GRU(64))Memory cells
# Automatically handled in LSTM layer, no manual memory cell neededTime steps
model.add(LSTM(64, input_shape=(10, 8))) # 10 time steps, 8 featuresText classification
model = Sequential([ Embedding(input_dim=5000, output_dim=128), LSTM(64), Dense(1, activation='sigmoid') ])Time series prediction
model.compile(optimizer='adam', loss='mse') model.fit(X_train, y_train, epochs=10)Stacked LSTM
model.add(LSTM(64, return_sequences=True)) model.add(LSTM(32))Dropout in LSTM
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))Bidirectional LSTM
from tensorflow.keras.layers import Bidirectional model.add(Bidirectional(LSTM(64)))Combining LSTM with CNN
model = Sequential([ Conv1D(64, 3, activation='relu'), LSTM(64), Dense(1, activation='sigmoid') ])
from tensorflow.keras.preprocessing.text import Tokenizer tokenizer = Tokenizer(num_words=1000) tokenizer.fit_on_texts(["This is a sentence"]) print(tokenizer.word_index)Text vectorization
from tensorflow.keras.layers import TextVectorization vectorizer = TextVectorization(max_tokens=1000) vectorizer.adapt(["This is a sentence"]) print(vectorizer(["This is a sentence"]))Word embeddings
from tensorflow.keras.layers import Embedding embedding_layer = Embedding(input_dim=1000, output_dim=64)Word2Vec integration
from gensim.models import Word2Vec model = Word2Vec(sentences, vector_size=100, window=5, min_count=1) model.wv["word"]Using pre-trained embeddings
embedding_matrix = ... # Load GloVe vectors into matrix model.add(Embedding(input_dim=vocab_size, output_dim=100, weights=[embedding_matrix], trainable=False))LSTM for text generation
from tensorflow.keras.layers import LSTM model.add(LSTM(128))Sentiment analysis
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X_train, y_train, epochs=5)Attention mechanisms
# See Transformer section for self-attention codeTransformer basics
from transformers import TFAutoModel transformer = TFAutoModel.from_pretrained("bert-base-uncased")Text summarization
from transformers import pipeline summarizer = pipeline("summarization") summary = summarizer("Long article text here...")
# Example: Similar words have similar vectors embedding['king'] - embedding['man'] + embedding['woman'] ≈ embedding['queen']One-hot vs embeddings
# One-hot example: [0, 0, 1, 0] → word3 (but no meaning or similarity info)Keras Embedding layer
model.add(Embedding(input_dim=10000, output_dim=64, input_length=100))Pre-trained GloVe usage
embeddings_index = {} with open("glove.6B.100d.txt") as f: for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefsFine-tuning embeddings
model.add(Embedding(input_dim=10000, output_dim=100, weights=[embedding_matrix], trainable=True))Visualizing embeddings
from sklearn.manifold import TSNE import matplotlib.pyplot as plt tsne = TSNE(n_components=2) reduced = tsne.fit_transform(embedding_matrix) plt.scatter(reduced[:, 0], reduced[:, 1])Embedding matrix
embedding_matrix = np.zeros((vocab_size, 100)) embedding_matrix[word_index['hello']] = embeddings_index['hello']Handling unknown tokens
tokenizer = Tokenizer(oov_token="Padding sequences")
from tensorflow.keras.preprocessing.sequence import pad_sequences padded = pad_sequences(sequences, padding='post', maxlen=100)Use case: sentiment model
model = Sequential([ Embedding(vocab_size, 100, input_length=100), LSTM(64), Dense(1, activation='sigmoid') ])
# Attention formula: Attention(Q, K, V) = softmax(QK^T / sqrt(d_k)) * VSelf-attention
# Used in Transformer encoders to weigh all tokens against each otherEncoder-Decoder structure
# Example in Transformer models: BERT (encoder), GPT (decoder)Scaled Dot-Product Attention
def scaled_dot_attention(Q, K, V): d_k = tf.cast(tf.shape(K)[-1], tf.float32) scores = tf.matmul(Q, K, transpose_b=True) / tf.math.sqrt(d_k) weights = tf.nn.softmax(scores) return tf.matmul(weights, V)Positional encoding
# Sinusoidal or learned positional encodings are added to input embeddingsBERT with Keras
from transformers import TFBertModel bert = TFBertModel.from_pretrained("bert-base-uncased")GPT architecture
from transformers import GPT2LMHeadModel model = GPT2LMHeadModel.from_pretrained("gpt2")Hugging Face Transformers
from transformers import pipeline qa = pipeline("question-answering") qa({"question": "Who is the CEO of OpenAI?", "context": "Sam Altman is the CEO."})Transformer training
from transformers import Trainer, TrainingArguments args = TrainingArguments(output_dir="./model", per_device_train_batch_size=16) trainer = Trainer(model=model, args=args, train_dataset=train_ds)Applications of transformers
# Examples: BERT for classification, GPT for text gen, T5 for summarization
# A generative model tries to model P(data) # E.g., generate images like handwritten digits from MNISTAutoencoders
from keras.models import Model from keras.layers import Input, Dense input_img = Input(shape=(784,)) encoded = Dense(64, activation='relu')(input_img) decoded = Dense(784, activation='sigmoid')(encoded) autoencoder = Model(input_img, decoded)Variational Autoencoders (VAE)
# VAE requires defining custom loss with KL divergence + reconstruction lossGANs
# GAN: train generator to fool discriminator, discriminator to detect fakesBuilding simple GAN
# generator = make_generator() # discriminator = make_discriminator() # Train them in alternating loopsConditional GAN
# Input = [noise + label] for generatorText generation
from keras.preprocessing.sequence import pad_sequences # Train LSTM on character or word sequences to predict next wordImage generation
# StyleGAN, DCGAN are popular for this taskDeepFakes
# Typically use encoder-decoder to encode face features and reconstruct on another faceEthical considerations
# Important: use responsibly and be aware of consequences
from keras.models import Model from keras.layers import Input, Dense inputs = Input(shape=(784,)) x = Dense(64, activation='relu')(inputs) outputs = Dense(10, activation='softmax')(x) model = Model(inputs, outputs)Inputs and outputs
print(model.inputs) print(model.outputs)Multi-input models
input1 = Input(shape=(32,)) input2 = Input(shape=(64,)) merged = concatenate([input1, input2])Multi-output models
output1 = Dense(1, name='output1')(merged) output2 = Dense(1, name='output2')(merged) model = Model(inputs=[input1, input2], outputs=[output1, output2])Shared layers
shared_dense = Dense(64) output1 = shared_dense(input1) output2 = shared_dense(input2)Residual connections
from keras.layers import Add residual = Add()([input_tensor, x])Model visualization
from keras.utils import plot_model plot_model(model, to_file='model.png', show_shapes=True)Model summary
model.summary()Custom models
class MyModel(keras.Model): def __init__(self): super().__init__() self.dense = Dense(10) def call(self, inputs): return self.dense(inputs)Real-world case
# Complex models with branches are easier using Functional API
# Use when standard layers aren’t sufficient for your logicBuilding custom Layer class
class MyLayer(keras.layers.Layer): def __init__(self): super().__init__() def call(self, inputs): return inputs * 2Using custom functions
class MultiplyByTen(keras.layers.Layer): def call(self, inputs): return inputs * 10Custom activation
from keras.layers import Activation, Lambda def custom_relu(x): return tf.maximum(0.1 * x, x) model.add(Lambda(custom_relu))Custom loss function
def custom_loss(y_true, y_pred): return tf.reduce_mean(tf.square(y_pred - y_true)) model.compile(loss=custom_loss, optimizer='adam')Custom metric
def custom_accuracy(y_true, y_pred): return tf.reduce_mean(tf.cast(tf.equal(y_true, tf.round(y_pred)), tf.float32))Subclassing Model
class MyModel(keras.Model): def __init__(self): super().__init__() self.dense = Dense(10) def call(self, inputs): return self.dense(inputs)Custom training loop
with tf.GradientTape() as tape: y_pred = model(x) loss = custom_loss(y_true, y_pred) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights))Debugging custom models
tf.print("Shape:", tf.shape(x))Examples
# Create a custom attention mechanism by subclassing Layer
encoded = encoder(input_data) decoded = decoder(encoded) autoencoder = Model(inputs=input_data, outputs=decoded)Architecture overview
input_img = Input(shape=(784,)) encoded = Dense(32, activation='relu')(input_img) decoded = Dense(784, activation='sigmoid')(encoded) autoencoder = Model(input_img, decoded)Encoder & decoder models
encoder = Model(input_img, encoded) encoded_input = Input(shape=(32,)) decoder_layer = autoencoder.layers[-1] decoder = Model(encoded_input, decoder_layer(encoded_input))Denoising autoencoder
noisy_input = input_img + noise autoencoder.fit(noisy_input, clean_img, epochs=50)Sparse autoencoder
encoded = Dense(64, activation='relu', activity_regularizer=regularizers.l1(1e-5))(input_img)Variational autoencoder
z_mean = Dense(latent_dim)(h) z_log_var = Dense(latent_dim)(h) z = z_mean + tf.exp(0.5 * z_log_var) * epsilonApplications
# Apply encoder to extract compressed representation for clustering encoded_imgs = encoder.predict(x_test)Image compression
autoencoder.fit(x_train, x_train, epochs=50, batch_size=256)Anomaly detection
if reconstruction_error > threshold: print("Anomaly detected")Visualization
encoded_imgs = encoder.predict(x_test) plt.scatter(encoded_imgs[:,0], encoded_imgs[:,1])
# Example time series data series = [112, 118, 132, 129, 121, 135, ...]Sliding window approach
X = [series[i:i+window] for i in range(len(series)-window)] y = [series[i+window] for i in range(len(series)-window)]Data reshaping
X = np.reshape(X, (X.shape[0], X.shape[1], 1))LSTM for forecasting
model = Sequential() model.add(LSTM(50, activation='relu', input_shape=(window, 1))) model.add(Dense(1))Multi-step predictions
model.add(Dense(3)) # Predict next 3 time stepsNormalization
scaler = MinMaxScaler() scaled_series = scaler.fit_transform(series.reshape(-1,1))Evaluation metrics
mae = mean_absolute_error(y_true, y_pred) rmse = np.sqrt(mean_squared_error(y_true, y_pred))Visualizing predictions
plt.plot(y_true, label='Actual') plt.plot(y_pred, label='Predicted')Combining with CNN
model = Sequential() model.add(Conv1D(64, 3, activation='relu', input_shape=(window, 1))) model.add(LSTM(50))Real-world project
# Predict future stock prices using LSTM with sliding window
# Output: [class, x_min, y_min, x_max, y_max]CNN backbone
base_model = tf.keras.applications.ResNet50(include_top=False, input_shape=(224,224,3))YOLO with Keras
# YOLO uses custom loss for bounding boxes + classificationSSD overview
# SSD uses anchor boxes at multiple feature map levelsBounding boxes
box = [x_min, y_min, x_max, y_max]Anchor boxes
# Anchors = reference boxes for detection layersLabel encoding
# Label = [class, x_center, y_center, width, height]Transfer learning for detection
model = tf.keras.Model(inputs=base_model.input, outputs=detection_head)Evaluating mAP
# mAP is calculated using precision-recall curve for each classReal-world example
# Detect faces in webcam feed with YOLOv5 or Haar cascades
# Each pixel labeled with a class (e.g., 0=background, 1=object) segmentation_mask = model.predict(image)U-Net architecture
# Load U-Net from segmentation_models library import segmentation_models as sm model = sm.Unet('resnet34', input_shape=(128,128,3), classes=1, activation='sigmoid')Data preparation
# Normalize images, resize masks image = image / 255.0 mask = tf.image.resize(mask, (128, 128))Mask generation
# Convert RGB mask to one-hot encoded class mask mask = tf.cast(mask == class_id, tf.float32)Dice loss
def dice_loss(y_true, y_pred): intersection = tf.reduce_sum(y_true * y_pred) return 1 - (2. * intersection + 1) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + 1)IoU metric
iou = tf.keras.metrics.MeanIoU(num_classes=2) iou.update_state(y_true, y_pred)Augmentation techniques
# Albumentations is popular for segmentation import albumentations as A A.HorizontalFlip(p=0.5)Post-processing
# Convert logits to binary mask mask = (model.predict(image) > 0.5).astype("uint8")Applications
# Example: identify tumors in medical imagesKeras example
model.compile(optimizer='adam', loss=dice_loss, metrics=['accuracy']) model.fit(train_dataset, validation_data=val_dataset, epochs=10)
from transformers import pipeline classifier = pipeline("sentiment-analysis") classifier("Hugging Face is awesome!")Tokenizers
from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") tokens = tokenizer("Hello!", return_tensors="pt")Importing models
from transformers import AutoModelForSequenceClassification model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")Text classification
from transformers import Trainer trainer = Trainer(model=model, train_dataset=train_ds, eval_dataset=val_ds) trainer.train()Question answering
qa = pipeline("question-answering") qa(question="What is Hugging Face?", context="Hugging Face is an AI company.")Text generation
gen = pipeline("text-generation", model="gpt2") gen("Once upon a time", max_length=50)Fine-tuning BERT
trainer.train() # After loading model, tokenizer, and datasetsDatasets module
from datasets import load_dataset dataset = load_dataset("imdb")Saving & exporting models
model.save_pretrained("my_bert_model") tokenizer.save_pretrained("my_bert_model")Use case demo
summarizer = pipeline("summarization") summarizer("Your long document text...")
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9) optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)Weight initialization
Dense(64, kernel_initializer='he_uniform')Gradient clipping
optimizer = tf.keras.optimizers.Adam(clipvalue=1.0)Mixed precision training
from tensorflow.keras.mixed_precision import set_global_policy set_global_policy('mixed_float16')XLA compilation
@tf.function(jit_compile=True) def train_step(inputs): ...Multi-GPU training
strategy = tf.distribute.MirroredStrategy() with strategy.scope(): model = create_model()TPU support
resolver = tf.distribute.cluster_resolver.TPUClusterResolver() tf.config.experimental_connect_to_cluster(resolver)Quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model) converter.optimizations = [tf.lite.Optimize.DEFAULT]Pruning
import tensorflow_model_optimization as tfmot pruned_model = tfmot.sparsity.keras.prune_low_magnitude(model)Model distillation
# Train student on soft labels from teacher student.predict(x_train) ≈ teacher.predict(x_train)
# Conceptual example: Explain model output importance # Actual implementation uses libraries like SHAP or LIMESHAP
import shap explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X_test) shap.summary_plot(shap_values, X_test)LIME
from lime import lime_tabular explainer = lime_tabular.LimeTabularExplainer(X_train) exp = explainer.explain_instance(X_test[0], model.predict) exp.show_in_notebook()Grad-CAM
# Keras Grad-CAM example available in tensorflow tutorialsFeature importance
import matplotlib.pyplot as plt plt.bar(feature_names, model.feature_importances_) plt.show()Saliency maps
# Use guided backpropagation or integrated gradients for saliency mapsKeras callbacks for visualization
from tensorflow.keras.callbacks import TensorBoard tensorboard = TensorBoard(log_dir='./logs') model.fit(X_train, y_train, callbacks=[tensorboard])Explainable AI (XAI) tools
# pip install shap lime eli5 interpretInterpretability for stakeholders
# Use dashboards and visual reports to communicate insightsTools comparison
# Choose tool based on model type: tree, deep learning, tabular
model.save("model_saved_model") # or model.save("model.h5")REST API with Flask
from flask import Flask, request, jsonify app = Flask(__name__) @app.route('/predict', methods=['POST']) def predict(): data = request.json prediction = model.predict(data['input']) return jsonify({'prediction': prediction.tolist()})TensorFlow Serving
# Run TF Serving docker container docker run -p 8501:8501 --mount type=bind,\ source=/models/model_name/,target=/models/model_name/ \ -e MODEL_NAME=model_name tensorflow/servingTensorFlow Lite
import tensorflow as tf converter = tf.lite.TFLiteConverter.from_saved_model("model_saved_model") tflite_model = converter.convert()TensorFlow.js
# Convert model for TF.js tensorflowjs_converter --input_format=tf_saved_model model_saved_model/ web_model/AWS deployment
# Example: Deploy using SageMaker SDK (Python) import sagemaker # Configure and deploy modelDockerizing Keras model
# Sample Dockerfile snippet FROM python:3.8 COPY model.h5 /app/Monitoring deployments
# Use Prometheus, Grafana for monitoring endpointsScaling inference
# Kubernetes autoscaling example kubectl autoscale deployment model-server --min=2 --max=10 --cpu-percent=80CI/CD for ML
# Use GitHub Actions, Jenkins for ML workflow automation
# Example: Tokenization with NLTK import nltk tokens = nltk.word_tokenize("Hello, how can I help?")Intent classification
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression # Train classifier to predict intentsEntity extraction
import spacy nlp = spacy.load("en_core_web_sm") doc = nlp("Book a flight to New York tomorrow") entities = [(ent.text, ent.label_) for ent in doc.ents]Sequence-to-sequence
# Simple seq2seq architecture in KerasAttention-based responses
# Attention layers in Transformer modelsContext management
# Store user session data or dialogue statesUsing transformers
from transformers import pipeline chatbot = pipeline("conversational")Chatbot deployment
# Deploy using Flask or serverless functionsRasa integration
# Rasa example: training and running a chatbot rasa train rasa runEvaluation
# Human-in-the-loop and automated testing methods
# Simple GAN architecture sketch # Generator creates fake samples, discriminator classifies real vs fakeGenerator architecture
from tensorflow.keras.layers import Dense, Reshape generator = Sequential([ Dense(128, activation='relu', input_shape=(100,)), Dense(784, activation='sigmoid'), Reshape((28,28,1)) ])Discriminator model
discriminator = Sequential([ Flatten(input_shape=(28,28,1)), Dense(128, activation='relu'), Dense(1, activation='sigmoid') ])Training loop
# Pseudocode for GAN training for epoch in epochs: train discriminator on real + fake train generator via discriminator feedbackStability tips
# Example: use batch normalization in generator layers from tensorflow.keras.layers import BatchNormalization generator.add(BatchNormalization())Conditional GANs
# Add label inputs concatenated with noise vectorDCGAN
# Use Conv2DTranspose in generator, Conv2D in discriminatorCycleGAN
# Used for style transfer like horses ↔ zebrasPix2Pix
# Example: input edges → output photoGAN applications
# Generate realistic images, augment data, create artworks
# Example: CNN for X-ray image classificationFraud detection
# Use anomaly detection algorithms on transaction dataE-commerce recommendations
# Collaborative filtering or content-based recommendation systemsStock prediction
# Time series LSTM model for price predictionSocial media sentiment
# Sentiment analysis with LSTM or transformersAutonomous driving
# Object detection with YOLO or SSD networksLanguage translation
# Transformer-based translation systemsVoice synthesis
# Tacotron or WaveNet modelsFacial recognition
# FaceNet or similar deep learning architecturesAR/VR
# Real-time gesture recognition using CNNs
# Use pipelines to automate workflowsExperiment tracking
import mlflow mlflow.start_run() mlflow.log_param(\"lr\", 0.01) mlflow.log_metric(\"accuracy\", 0.95) mlflow.end_run()Model versioning
# Save models with version IDs model.save(\"model_v1.h5\")CI/CD pipelines
# Example: GitHub Actions or Jenkins pipeline scriptsMLflow usage
mlflow.start_run() mlflow.log_param(\"batch_size\", 64) mlflow.sklearn.log_model(model, \"model\") mlflow.end_run()Monitoring models
# Use Prometheus or custom dashboardsDrift detection
# Statistical tests or dedicated libraries like Alibi DetectFeature stores
# Feast or similar feature store toolsReal-time inference
# TensorFlow Serving or FastAPI endpointsBest practices
# Automate testing, versioning, and monitoring
# Example: Checking class balance in Python from collections import Counter print(Counter(y_train))
# Use AIF360 library to compute fairness metrics from aif360.metrics import ClassificationMetric
# Document model decisions and data sources for auditability
import shap explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X)
# TensorFlow Federated example to train without centralized data
from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
# Automate model fairness and performance checks in CI/CD pipelines
# Maintain governance docs and approval workflows
# Use human review for flagged predictions before final action
# Include data processing agreements and consent management
# Monitor training logs for errors and warnings
# Apply gradient clipping in TensorFlow optimizer = tf.keras.optimizers.Adam() @tf.function def train_step(): gradients = tape.gradient(loss, model.trainable_variables) clipped = [tf.clip_by_norm(g, 1.0) for g in gradients] optimizer.apply_gradients(zip(clipped, model.trainable_variables))
# Use ReLU activations to mitigate vanishing gradients model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
# Visualize samples and labels to check alignment
# Use train_test_split with stratification and shuffle
import numpy as np import tensorflow as tf np.random.seed(42) tf.random.set_seed(42)
import mlflow mlflow.log_param("lr", 0.001)
# Use pdb in Python scripts import pdb; pdb.set_trace()
# TensorBoard profiling example tensorboard --logdir=logs/profile
// Explore roles on job boards like LinkedIn, Glassdoor
// Use LeetCode and interview prep platforms
// Create repositories with clear READMEs and demos
git init git add . git commit -m "Initial commit" git push origin main
// Examples: Google AI, IBM Data Science Professional Certificate
// Tailor resumes for each job application
// Practice behavioral and technical questions regularly
// Join platforms like Upwork or Freelancer
// Start a blog or YouTube channel on AI topics
// Use arXiv, Twitter, and conferences like NeurIPS