from keras.models import Sequential
model = Sequential()
History and evolution
import keras
print(keras.__version__)
Installing Keras & dependencies
pip install tensorflow keras
Backend support (TensorFlow, Theano, CNTK)
from keras import backend as K
print(K.backend())
Keras vs other frameworks
# Keras uses Sequential or Functional APIs for ease of use
model = Sequential()
Keras ecosystem overview
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
Hello World with Keras
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(10, input_shape=(5,), activation='relu'))
Project structure in Keras
/project
/data
/models
train.py
utils.py
Community & documentation
# Visit https://keras.io for API references and tutorials
Use cases of Keras
# Keras is used in domains like healthcare, finance, and robotics
# Each artificial neuron: output = activation(weighted_sum + bias)
Perceptron model
output = 1 if (w1*x1 + w2*x2 + b) > 0 else 0
Feedforward neural networks
model = Sequential([
Dense(10, input_shape=(4,), activation='relu'),
Dense(1, activation='sigmoid')
])
Activation functions
from keras.layers import Activation
model.add(Dense(64))
model.add(Activation('relu'))
Loss functions
model.compile(loss='binary_crossentropy', optimizer='adam')
Optimizers overview
model.compile(optimizer='adam')
Forward and backward propagation
# Automatically handled in model.fit()
Epochs and batches
model.fit(X, y, epochs=10, batch_size=32)
Overfitting and underfitting
from keras.layers import Dropout
model.add(Dropout(0.5))
Deep learning in real life
# Example: text sentiment classification or image detection
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(100,)))
Adding dense layers
model.add(Dense(32, activation='relu'))
model.add(Dense(10, activation='softmax'))
Configuring activation functions
model.add(Dense(1, activation='sigmoid'))
Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
Model summary
model.summary()
Training the model
model.fit(X_train, y_train, epochs=10, batch_size=32)
Evaluating model accuracy
loss, acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {acc}")
Making predictions
predictions = model.predict(X_new)
Saving and loading models
model.save('my_model.h5')
loaded_model = keras.models.load_model('my_model.h5')
Use case: Basic classifier
model = Sequential([
Dense(128, activation='relu', input_shape=(784,)),
Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
from tensorflow.keras.layers import Normalization
normalizer = Normalization()
normalizer.adapt(data) # data is a NumPy array or tf.data dataset
Splitting train/test/validation
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
Encoding labels
from tensorflow.keras.utils import to_categorical
y_encoded = to_categorical(y)
Data generators
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(rescale=1./255)
train_gen = datagen.flow_from_directory('train/', target_size=(64, 64))
Image preprocessing
img = tf.keras.utils.load_img("cat.jpg", target_size=(64,64))
img_array = tf.keras.utils.img_to_array(img)/255.0
Text preprocessing
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
Sequence padding
from tensorflow.keras.preprocessing.sequence import pad_sequences
padded = pad_sequences(sequences, padding='post', maxlen=100)
Handling missing values
import numpy as np
X = np.nan_to_num(X) # replaces NaN with 0
Batch processing
model.fit(X_train, y_train, batch_size=32, epochs=10)
Data augmentation
datagen = ImageDataGenerator(rotation_range=20, horizontal_flip=True)
augmented = datagen.flow(X_train, y_train)
from tensorflow.keras.activations import sigmoid
output = sigmoid(x)
Tanh
from tensorflow.keras.activations import tanh
output = tanh(x)
ReLU
from tensorflow.keras.layers import Activation
model.add(Dense(64))
model.add(Activation('relu'))
Leaky ReLU
from tensorflow.keras.layers import LeakyReLU
model.add(LeakyReLU(alpha=0.01))
ELU
from tensorflow.keras.layers import ELU
model.add(ELU(alpha=1.0))
Softmax
model.add(Dense(3, activation='softmax'))
Swish
from tensorflow.keras.activations import swish
output = swish(x)
Choosing the right activation
// No code — selection depends on task
Custom activations
from tensorflow.keras.layers import Lambda
model.add(Lambda(lambda x: x**2))
Visualizing activations
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(-10, 10, 100)
plt.plot(x, np.maximum(0, x)) # ReLU
plt.show()
model.compile(optimizer='adam', loss='mean_squared_error')
Binary Crossentropy
model.compile(optimizer='adam', loss='binary_crossentropy')
Categorical Crossentropy
model.compile(optimizer='adam', loss='categorical_crossentropy')
Sparse Categorical Crossentropy
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
Hinge loss
model.compile(optimizer='adam', loss='hinge')
Kullback-Leibler divergence
model.compile(optimizer='adam', loss='kullback_leibler_divergence')
Custom loss functions
def custom_loss(y_true, y_pred):
return tf.reduce_mean(tf.square(y_true - y_pred) + 0.1)
model.compile(optimizer='adam', loss=custom_loss)
Choosing loss by task
// Decision logic based on task type
Regularization penalties
from tensorflow.keras import regularizers
model.add(Dense(64, kernel_regularizer=regularizers.l2(0.01)))
Loss function behavior
import matplotlib.pyplot as plt
x = np.linspace(-1, 1, 100)
plt.plot(x, x**2) # MSE shape
plt.show()
# Basic Gradient Descent theta = theta - learning_rate * gradientStochastic Gradient Descent (SGD)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)Momentum
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)Nesterov Accelerated Gradient
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)Adam
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)RMSprop
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)Adagrad
optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.01)Nadam
optimizer = tf.keras.optimizers.Nadam(learning_rate=0.002)Choosing the right optimizer
# Example: Try different optimizers to compare performanceLearning rate scheduling
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate=0.01, decay_steps=10000, decay_rate=0.9)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
accuracy = (TP + TN) / (TP + TN + FP + FN)Precision & Recall
precision = TP / (TP + FP) recall = TP / (TP + FN)F1 Score
f1_score = 2 * (precision * recall) / (precision + recall)Confusion Matrix
from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_true, y_pred)ROC-AUC
from sklearn.metrics import roc_auc_score auc = roc_auc_score(y_true, y_scores)Mean Absolute Error
from sklearn.metrics import mean_absolute_error mae = mean_absolute_error(y_true, y_pred)R-squared
from sklearn.metrics import r2_score r2 = r2_score(y_true, y_pred)Custom metrics
def custom_metric(y_true, y_pred):
return tf.reduce_mean(tf.abs(y_true - y_pred))
Visualizing metricsimport matplotlib.pyplot as plt plt.plot(history.history['accuracy'])Metric callbacks
EarlyStopping(monitor='val_accuracy', patience=3)
model.save('model.h5') # HDF5 format
model.save('my_model/') # TensorFlow SavedModel format
Loading saved models
model = tf.keras.models.load_model('model.h5')
Model checkpointsModelCheckpoint(filepath='best_model.h5', save_best_only=True)TensorFlow Lite for mobile
converter = tf.lite.TFLiteConverter.from_saved_model('my_model')
tflite_model = converter.convert()
TensorFlow.js for webtensorflowjs_converter --input_format=tf_saved_model my_model/ web_model/Exporting to ONNX
# Use tf2onnx python -m tf2onnx.convert --saved-model my_model --output model.onnxVersioning models
# Save versioned directories: model/v1/, model/v2/Using Pickle
import pickle
pickle.dump(model, open('model.pkl', 'wb'))
Inference API
@app.route('/predict', methods=['POST'])
def predict():
data = request.get_json()
prediction = model.predict(data)
return jsonify(prediction.tolist())
Deployment examples# Docker example FROM tensorflow/tensorflow:latest COPY model/ /app/model/
from tensorflow.keras.callbacks import Callback
# Example of using callbacks in model training
model.fit(X, y, epochs=10, callbacks=[callback1, callback2])
EarlyStopping
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', patience=3)
model.fit(X, y, validation_data=(X_val, y_val), callbacks=[early_stop])
ModelCheckpoint
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint('model.h5', save_best_only=True)
model.fit(X, y, validation_split=0.2, callbacks=[checkpoint])
LearningRateScheduler
from tensorflow.keras.callbacks import LearningRateScheduler
def lr_schedule(epoch): return 0.01 * (0.1 ** (epoch // 10))
lr_sched = LearningRateScheduler(lr_schedule)
model.fit(X, y, callbacks=[lr_sched])
ReduceLROnPlateau
from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=2)
model.fit(X, y, validation_data=(X_val, y_val), callbacks=[reduce_lr])
TensorBoard callback
from tensorflow.keras.callbacks import TensorBoard
tensorboard = TensorBoard(log_dir='./logs')
model.fit(X, y, callbacks=[tensorboard])
CSVLogger
from tensorflow.keras.callbacks import CSVLogger
csv_logger = CSVLogger('training.log')
model.fit(X, y, callbacks=[csv_logger])
Custom callbacks
class MyCallback(Callback):
def on_epoch_end(self, epoch, logs=None):
print(f"Epoch {epoch} ended. Loss: {logs['loss']}")
model.fit(X, y, callbacks=[MyCallback()])
Callback chaining
callbacks = [early_stop, checkpoint, tensorboard]
model.fit(X, y, callbacks=callbacks)
Monitoring and logging
# All logs can be accessed via `logs` dictionary in custom callbacks
def on_epoch_end(self, epoch, logs=None): print(logs['val_accuracy'])
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))
Convolution layers
model.add(Conv2D(64, (3, 3), activation='relu'))
Pooling layers
from tensorflow.keras.layers import MaxPooling2D
model.add(MaxPooling2D(pool_size=(2, 2)))
Flattening
from tensorflow.keras.layers import Flatten
model.add(Flatten())
Feature maps
# Feature maps are automatically generated by Conv2D
# Inspect via intermediate model or visualize with matplotlib
Dropout in CNNs
from tensorflow.keras.layers import Dropout
model.add(Dropout(0.5))
Image classification with CNNs
model.add(Dense(10, activation='softmax')) # for 10 classes
Transfer learning basics
from tensorflow.keras.applications import VGG16
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(64,64,3))
Regularization in CNNs
from tensorflow.keras.regularizers import l2
Conv2D(32, (3,3), activation='relu', kernel_regularizer=l2(0.01))
Real-world CNN use case
# Example: MRI tumor detection using CNN
# Architecture same, but trained on MRI image dataset
from tensorflow.keras.preprocessing.image import load_img
img = load_img('cat.jpg', target_size=(64, 64))
Rescaling pixels
from tensorflow.keras.preprocessing.image import img_to_array
img_array = img_to_array(img) / 255.0
ImageDataGenerator
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(rescale=1./255)
train_gen = datagen.flow_from_directory('train/', target_size=(64,64), class_mode='categorical')
Data augmentation
aug_datagen = ImageDataGenerator(rotation_range=20, zoom_range=0.2, horizontal_flip=True)
Building CNN for CIFAR-10
model = Sequential([
Conv2D(32, (3,3), activation='relu', input_shape=(32,32,3)),
MaxPooling2D(2,2),
Flatten(),
Dense(10, activation='softmax')
])
Fine-tuning pre-trained models
base_model.trainable = True # Unfreeze layers for fine-tuning
model.compile(optimizer='adam', loss='categorical_crossentropy')
Multi-class classification
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
Batch normalization
from tensorflow.keras.layers import BatchNormalization
model.add(BatchNormalization())
Visualizing filters
# Use intermediate layer model to get filter outputs
from tensorflow.keras.models import Model
intermediate_model = Model(inputs=model.input, outputs=model.layers[1].output)
Model evaluation
model.evaluate(X_test, y_test)
from tensorflow.keras.applications import VGG16
base_model = VGG16(weights='imagenet', include_top=False)
VGG16 usage
model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
ResNet integration
from tensorflow.keras.applications import ResNet50
base_model = ResNet50(weights='imagenet', include_top=False)
InceptionNet and Xception
from tensorflow.keras.applications import InceptionV3
base_model = InceptionV3(weights='imagenet', include_top=False)
Freezing and unfreezing layers
for layer in base_model.layers:
layer.trainable = False
Feature extraction
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(10, activation='softmax')(x)
Fine-tuning strategy
for layer in base_model.layers[-4:]:
layer.trainable = True
Using pre-trained weights
model = VGG16(weights="imagenet", include_top=False)
Transfer learning on custom data
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_data, epochs=10)
Model export
model.save("custom_transfer_model.h5")
from tensorflow.keras.layers import SimpleRNN
model.add(SimpleRNN(64, input_shape=(timesteps, features)))
Use cases
# Text classification, stock prediction, etc.
RNN layers in Keras
from tensorflow.keras.layers import SimpleRNN
rnn_layer = SimpleRNN(32)
Sequence modeling
model = Sequential()
model.add(SimpleRNN(64, return_sequences=True))
Vanishing gradient problem
# LSTM/GRU address this with gating mechanisms
Bidirectional RNNs
from tensorflow.keras.layers import Bidirectional
model.add(Bidirectional(SimpleRNN(64)))
Masking and padding
from tensorflow.keras.layers import Masking
model.add(Masking(mask_value=0.0, input_shape=(timesteps, features)))
Simple RNN model
model = Sequential([
SimpleRNN(64),
Dense(1, activation='sigmoid')
])
Forecasting with RNNs
# Input shape: (batch_size, timesteps, features)
model.fit(X_train, y_train, epochs=20)
Real-world applications
# Google Translate and Alexa use RNN-based models
from tensorflow.keras.layers import LSTM
model.add(LSTM(64))
GRU vs LSTM
from tensorflow.keras.layers import GRU
model.add(GRU(64))
Memory cells
# Automatically handled in LSTM layer, no manual memory cell needed
Time steps
model.add(LSTM(64, input_shape=(10, 8))) # 10 time steps, 8 features
Text classification
model = Sequential([
Embedding(input_dim=5000, output_dim=128),
LSTM(64),
Dense(1, activation='sigmoid')
])
Time series prediction
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=10)
Stacked LSTM
model.add(LSTM(64, return_sequences=True))
model.add(LSTM(32))
Dropout in LSTM
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
Bidirectional LSTM
from tensorflow.keras.layers import Bidirectional
model.add(Bidirectional(LSTM(64)))
Combining LSTM with CNN
model = Sequential([
Conv1D(64, 3, activation='relu'),
LSTM(64),
Dense(1, activation='sigmoid')
])
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(["This is a sentence"])
print(tokenizer.word_index)
Text vectorization
from tensorflow.keras.layers import TextVectorization
vectorizer = TextVectorization(max_tokens=1000)
vectorizer.adapt(["This is a sentence"])
print(vectorizer(["This is a sentence"]))
Word embeddings
from tensorflow.keras.layers import Embedding
embedding_layer = Embedding(input_dim=1000, output_dim=64)
Word2Vec integration
from gensim.models import Word2Vec
model = Word2Vec(sentences, vector_size=100, window=5, min_count=1)
model.wv["word"]
Using pre-trained embeddings
embedding_matrix = ... # Load GloVe vectors into matrix
model.add(Embedding(input_dim=vocab_size, output_dim=100, weights=[embedding_matrix], trainable=False))
LSTM for text generation
from tensorflow.keras.layers import LSTM
model.add(LSTM(128))
Sentiment analysis
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5)
Attention mechanisms
# See Transformer section for self-attention code
Transformer basics
from transformers import TFAutoModel
transformer = TFAutoModel.from_pretrained("bert-base-uncased")
Text summarization
from transformers import pipeline
summarizer = pipeline("summarization")
summary = summarizer("Long article text here...")
# Example: Similar words have similar vectors
embedding['king'] - embedding['man'] + embedding['woman'] ≈ embedding['queen']
One-hot vs embeddings
# One-hot example:
[0, 0, 1, 0] → word3 (but no meaning or similarity info)
Keras Embedding layer
model.add(Embedding(input_dim=10000, output_dim=64, input_length=100))
Pre-trained GloVe usage
embeddings_index = {}
with open("glove.6B.100d.txt") as f:
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
Fine-tuning embeddings
model.add(Embedding(input_dim=10000, output_dim=100, weights=[embedding_matrix], trainable=True))
Visualizing embeddings
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
tsne = TSNE(n_components=2)
reduced = tsne.fit_transform(embedding_matrix)
plt.scatter(reduced[:, 0], reduced[:, 1])
Embedding matrix
embedding_matrix = np.zeros((vocab_size, 100))
embedding_matrix[word_index['hello']] = embeddings_index['hello']
Handling unknown tokenstokenizer = Tokenizer(oov_token="Padding sequences")
from tensorflow.keras.preprocessing.sequence import pad_sequences
padded = pad_sequences(sequences, padding='post', maxlen=100)
Use case: sentiment model
model = Sequential([
Embedding(vocab_size, 100, input_length=100),
LSTM(64),
Dense(1, activation='sigmoid')
])
# Attention formula:
Attention(Q, K, V) = softmax(QK^T / sqrt(d_k)) * V
Self-attention
# Used in Transformer encoders to weigh all tokens against each other
Encoder-Decoder structure
# Example in Transformer models: BERT (encoder), GPT (decoder)
Scaled Dot-Product Attention
def scaled_dot_attention(Q, K, V):
d_k = tf.cast(tf.shape(K)[-1], tf.float32)
scores = tf.matmul(Q, K, transpose_b=True) / tf.math.sqrt(d_k)
weights = tf.nn.softmax(scores)
return tf.matmul(weights, V)
Positional encoding
# Sinusoidal or learned positional encodings are added to input embeddings
BERT with Keras
from transformers import TFBertModel
bert = TFBertModel.from_pretrained("bert-base-uncased")
GPT architecture
from transformers import GPT2LMHeadModel
model = GPT2LMHeadModel.from_pretrained("gpt2")
Hugging Face Transformers
from transformers import pipeline
qa = pipeline("question-answering")
qa({"question": "Who is the CEO of OpenAI?", "context": "Sam Altman is the CEO."})
Transformer training
from transformers import Trainer, TrainingArguments
args = TrainingArguments(output_dir="./model", per_device_train_batch_size=16)
trainer = Trainer(model=model, args=args, train_dataset=train_ds)
Applications of transformers
# Examples: BERT for classification, GPT for text gen, T5 for summarization
# A generative model tries to model P(data)
# E.g., generate images like handwritten digits from MNIST
Autoencoders
from keras.models import Model
from keras.layers import Input, Dense
input_img = Input(shape=(784,))
encoded = Dense(64, activation='relu')(input_img)
decoded = Dense(784, activation='sigmoid')(encoded)
autoencoder = Model(input_img, decoded)
Variational Autoencoders (VAE)
# VAE requires defining custom loss with KL divergence + reconstruction loss
GANs
# GAN: train generator to fool discriminator, discriminator to detect fakes
Building simple GAN
# generator = make_generator()
# discriminator = make_discriminator()
# Train them in alternating loops
Conditional GAN
# Input = [noise + label] for generator
Text generation
from keras.preprocessing.sequence import pad_sequences
# Train LSTM on character or word sequences to predict next word
Image generation
# StyleGAN, DCGAN are popular for this task
DeepFakes
# Typically use encoder-decoder to encode face features and reconstruct on another face
Ethical considerations
# Important: use responsibly and be aware of consequences
from keras.models import Model
from keras.layers import Input, Dense
inputs = Input(shape=(784,))
x = Dense(64, activation='relu')(inputs)
outputs = Dense(10, activation='softmax')(x)
model = Model(inputs, outputs)
Inputs and outputs
print(model.inputs)
print(model.outputs)
Multi-input models
input1 = Input(shape=(32,))
input2 = Input(shape=(64,))
merged = concatenate([input1, input2])
Multi-output models
output1 = Dense(1, name='output1')(merged)
output2 = Dense(1, name='output2')(merged)
model = Model(inputs=[input1, input2], outputs=[output1, output2])
Shared layers
shared_dense = Dense(64)
output1 = shared_dense(input1)
output2 = shared_dense(input2)
Residual connections
from keras.layers import Add
residual = Add()([input_tensor, x])
Model visualization
from keras.utils import plot_model
plot_model(model, to_file='model.png', show_shapes=True)
Model summary
model.summary()
Custom models
class MyModel(keras.Model):
def __init__(self):
super().__init__()
self.dense = Dense(10)
def call(self, inputs):
return self.dense(inputs)
Real-world case
# Complex models with branches are easier using Functional API
# Use when standard layers aren’t sufficient for your logic
Building custom Layer class
class MyLayer(keras.layers.Layer):
def __init__(self):
super().__init__()
def call(self, inputs):
return inputs * 2
Using custom functions
class MultiplyByTen(keras.layers.Layer):
def call(self, inputs):
return inputs * 10
Custom activation
from keras.layers import Activation, Lambda
def custom_relu(x):
return tf.maximum(0.1 * x, x)
model.add(Lambda(custom_relu))
Custom loss function
def custom_loss(y_true, y_pred):
return tf.reduce_mean(tf.square(y_pred - y_true))
model.compile(loss=custom_loss, optimizer='adam')
Custom metric
def custom_accuracy(y_true, y_pred):
return tf.reduce_mean(tf.cast(tf.equal(y_true, tf.round(y_pred)), tf.float32))
Subclassing Model
class MyModel(keras.Model):
def __init__(self):
super().__init__()
self.dense = Dense(10)
def call(self, inputs):
return self.dense(inputs)
Custom training loop
with tf.GradientTape() as tape:
y_pred = model(x)
loss = custom_loss(y_true, y_pred)
grads = tape.gradient(loss, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
Debugging custom models
tf.print("Shape:", tf.shape(x))
Examples
# Create a custom attention mechanism by subclassing Layer
encoded = encoder(input_data) decoded = decoder(encoded) autoencoder = Model(inputs=input_data, outputs=decoded)Architecture overview
input_img = Input(shape=(784,)) encoded = Dense(32, activation='relu')(input_img) decoded = Dense(784, activation='sigmoid')(encoded) autoencoder = Model(input_img, decoded)Encoder & decoder models
encoder = Model(input_img, encoded) encoded_input = Input(shape=(32,)) decoder_layer = autoencoder.layers[-1] decoder = Model(encoded_input, decoder_layer(encoded_input))Denoising autoencoder
noisy_input = input_img + noise autoencoder.fit(noisy_input, clean_img, epochs=50)Sparse autoencoder
encoded = Dense(64, activation='relu', activity_regularizer=regularizers.l1(1e-5))(input_img)Variational autoencoder
z_mean = Dense(latent_dim)(h) z_log_var = Dense(latent_dim)(h) z = z_mean + tf.exp(0.5 * z_log_var) * epsilonApplications
# Apply encoder to extract compressed representation for clustering encoded_imgs = encoder.predict(x_test)Image compression
autoencoder.fit(x_train, x_train, epochs=50, batch_size=256)Anomaly detection
if reconstruction_error > threshold:
print("Anomaly detected")
Visualizationencoded_imgs = encoder.predict(x_test) plt.scatter(encoded_imgs[:,0], encoded_imgs[:,1])
# Example time series data series = [112, 118, 132, 129, 121, 135, ...]Sliding window approach
X = [series[i:i+window] for i in range(len(series)-window)] y = [series[i+window] for i in range(len(series)-window)]Data reshaping
X = np.reshape(X, (X.shape[0], X.shape[1], 1))LSTM for forecasting
model = Sequential() model.add(LSTM(50, activation='relu', input_shape=(window, 1))) model.add(Dense(1))Multi-step predictions
model.add(Dense(3)) # Predict next 3 time stepsNormalization
scaler = MinMaxScaler() scaled_series = scaler.fit_transform(series.reshape(-1,1))Evaluation metrics
mae = mean_absolute_error(y_true, y_pred) rmse = np.sqrt(mean_squared_error(y_true, y_pred))Visualizing predictions
plt.plot(y_true, label='Actual') plt.plot(y_pred, label='Predicted')Combining with CNN
model = Sequential() model.add(Conv1D(64, 3, activation='relu', input_shape=(window, 1))) model.add(LSTM(50))Real-world project
# Predict future stock prices using LSTM with sliding window
# Output: [class, x_min, y_min, x_max, y_max]CNN backbone
base_model = tf.keras.applications.ResNet50(include_top=False, input_shape=(224,224,3))YOLO with Keras
# YOLO uses custom loss for bounding boxes + classificationSSD overview
# SSD uses anchor boxes at multiple feature map levelsBounding boxes
box = [x_min, y_min, x_max, y_max]Anchor boxes
# Anchors = reference boxes for detection layersLabel encoding
# Label = [class, x_center, y_center, width, height]Transfer learning for detection
model = tf.keras.Model(inputs=base_model.input, outputs=detection_head)Evaluating mAP
# mAP is calculated using precision-recall curve for each classReal-world example
# Detect faces in webcam feed with YOLOv5 or Haar cascades
# Each pixel labeled with a class (e.g., 0=background, 1=object)
segmentation_mask = model.predict(image)
U-Net architecture
# Load U-Net from segmentation_models library
import segmentation_models as sm
model = sm.Unet('resnet34', input_shape=(128,128,3), classes=1, activation='sigmoid')
Data preparation
# Normalize images, resize masks
image = image / 255.0
mask = tf.image.resize(mask, (128, 128))
Mask generation
# Convert RGB mask to one-hot encoded class mask
mask = tf.cast(mask == class_id, tf.float32)
Dice loss
def dice_loss(y_true, y_pred):
intersection = tf.reduce_sum(y_true * y_pred)
return 1 - (2. * intersection + 1) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + 1)
IoU metric
iou = tf.keras.metrics.MeanIoU(num_classes=2)
iou.update_state(y_true, y_pred)
Augmentation techniques
# Albumentations is popular for segmentation
import albumentations as A
A.HorizontalFlip(p=0.5)
Post-processing
# Convert logits to binary mask
mask = (model.predict(image) > 0.5).astype("uint8")
Applications
# Example: identify tumors in medical images
Keras example
model.compile(optimizer='adam', loss=dice_loss, metrics=['accuracy'])
model.fit(train_dataset, validation_data=val_dataset, epochs=10)
from transformers import pipeline
classifier = pipeline("sentiment-analysis")
classifier("Hugging Face is awesome!")
Tokenizers
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
tokens = tokenizer("Hello!", return_tensors="pt")
Importing models
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
Text classification
from transformers import Trainer
trainer = Trainer(model=model, train_dataset=train_ds, eval_dataset=val_ds)
trainer.train()
Question answering
qa = pipeline("question-answering")
qa(question="What is Hugging Face?", context="Hugging Face is an AI company.")
Text generation
gen = pipeline("text-generation", model="gpt2")
gen("Once upon a time", max_length=50)
Fine-tuning BERT
trainer.train() # After loading model, tokenizer, and datasets
Datasets module
from datasets import load_dataset
dataset = load_dataset("imdb")
Saving & exporting models
model.save_pretrained("my_bert_model")
tokenizer.save_pretrained("my_bert_model")
Use case demo
summarizer = pipeline("summarization")
summarizer("Your long document text...")
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
Weight initialization
Dense(64, kernel_initializer='he_uniform')
Gradient clipping
optimizer = tf.keras.optimizers.Adam(clipvalue=1.0)
Mixed precision training
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('mixed_float16')
XLA compilation
@tf.function(jit_compile=True)
def train_step(inputs): ...
Multi-GPU training
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
model = create_model()
TPU support
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(resolver)
Quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
Pruning
import tensorflow_model_optimization as tfmot
pruned_model = tfmot.sparsity.keras.prune_low_magnitude(model)
Model distillation
# Train student on soft labels from teacher
student.predict(x_train) ≈ teacher.predict(x_train)
# Conceptual example: Explain model output importance
# Actual implementation uses libraries like SHAP or LIME
SHAP
import shap
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test)
LIME
from lime import lime_tabular
explainer = lime_tabular.LimeTabularExplainer(X_train)
exp = explainer.explain_instance(X_test[0], model.predict)
exp.show_in_notebook()
Grad-CAM
# Keras Grad-CAM example available in tensorflow tutorials
Feature importance
import matplotlib.pyplot as plt
plt.bar(feature_names, model.feature_importances_)
plt.show()
Saliency maps
# Use guided backpropagation or integrated gradients for saliency maps
Keras callbacks for visualization
from tensorflow.keras.callbacks import TensorBoard
tensorboard = TensorBoard(log_dir='./logs')
model.fit(X_train, y_train, callbacks=[tensorboard])
Explainable AI (XAI) tools
# pip install shap lime eli5 interpret
Interpretability for stakeholders
# Use dashboards and visual reports to communicate insights
Tools comparison
# Choose tool based on model type: tree, deep learning, tabular
model.save("model_saved_model")
# or
model.save("model.h5")
REST API with Flask
from flask import Flask, request, jsonify
app = Flask(__name__)
@app.route('/predict', methods=['POST'])
def predict():
data = request.json
prediction = model.predict(data['input'])
return jsonify({'prediction': prediction.tolist()})
TensorFlow Serving
# Run TF Serving docker container
docker run -p 8501:8501 --mount type=bind,\
source=/models/model_name/,target=/models/model_name/ \
-e MODEL_NAME=model_name tensorflow/serving
TensorFlow Lite
import tensorflow as tf
converter = tf.lite.TFLiteConverter.from_saved_model("model_saved_model")
tflite_model = converter.convert()
TensorFlow.js
# Convert model for TF.js
tensorflowjs_converter --input_format=tf_saved_model model_saved_model/ web_model/
AWS deployment
# Example: Deploy using SageMaker SDK (Python)
import sagemaker
# Configure and deploy model
Dockerizing Keras model
# Sample Dockerfile snippet
FROM python:3.8
COPY model.h5 /app/
Monitoring deployments
# Use Prometheus, Grafana for monitoring endpoints
Scaling inference
# Kubernetes autoscaling example
kubectl autoscale deployment model-server --min=2 --max=10 --cpu-percent=80
CI/CD for ML
# Use GitHub Actions, Jenkins for ML workflow automation
# Example: Tokenization with NLTK
import nltk
tokens = nltk.word_tokenize("Hello, how can I help?")
Intent classification
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
# Train classifier to predict intents
Entity extraction
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Book a flight to New York tomorrow")
entities = [(ent.text, ent.label_) for ent in doc.ents]
Sequence-to-sequence
# Simple seq2seq architecture in Keras
Attention-based responses
# Attention layers in Transformer models
Context management
# Store user session data or dialogue states
Using transformers
from transformers import pipeline
chatbot = pipeline("conversational")
Chatbot deployment
# Deploy using Flask or serverless functions
Rasa integration
# Rasa example: training and running a chatbot
rasa train
rasa run
Evaluation
# Human-in-the-loop and automated testing methods
# Simple GAN architecture sketch
# Generator creates fake samples, discriminator classifies real vs fake
Generator architecture
from tensorflow.keras.layers import Dense, Reshape
generator = Sequential([
Dense(128, activation='relu', input_shape=(100,)),
Dense(784, activation='sigmoid'),
Reshape((28,28,1))
])
Discriminator model
discriminator = Sequential([
Flatten(input_shape=(28,28,1)),
Dense(128, activation='relu'),
Dense(1, activation='sigmoid')
])
Training loop
# Pseudocode for GAN training
for epoch in epochs:
train discriminator on real + fake
train generator via discriminator feedback
Stability tips
# Example: use batch normalization in generator layers
from tensorflow.keras.layers import BatchNormalization
generator.add(BatchNormalization())
Conditional GANs
# Add label inputs concatenated with noise vector
DCGAN
# Use Conv2DTranspose in generator, Conv2D in discriminator
CycleGAN
# Used for style transfer like horses ↔ zebras
Pix2Pix
# Example: input edges → output photo
GAN applications
# Generate realistic images, augment data, create artworks
# Example: CNN for X-ray image classification
Fraud detection
# Use anomaly detection algorithms on transaction data
E-commerce recommendations
# Collaborative filtering or content-based recommendation systems
Stock prediction
# Time series LSTM model for price prediction
Social media sentiment
# Sentiment analysis with LSTM or transformers
Autonomous driving
# Object detection with YOLO or SSD networks
Language translation
# Transformer-based translation systems
Voice synthesis
# Tacotron or WaveNet models
Facial recognition
# FaceNet or similar deep learning architectures
AR/VR
# Real-time gesture recognition using CNNs
# Use pipelines to automate workflows
Experiment tracking
import mlflow
mlflow.start_run()
mlflow.log_param(\"lr\", 0.01)
mlflow.log_metric(\"accuracy\", 0.95)
mlflow.end_run()
Model versioning
# Save models with version IDs
model.save(\"model_v1.h5\")
CI/CD pipelines
# Example: GitHub Actions or Jenkins pipeline scripts
MLflow usage
mlflow.start_run()
mlflow.log_param(\"batch_size\", 64)
mlflow.sklearn.log_model(model, \"model\")
mlflow.end_run()
Monitoring models
# Use Prometheus or custom dashboards
Drift detection
# Statistical tests or dedicated libraries like Alibi Detect
Feature stores
# Feast or similar feature store tools
Real-time inference
# TensorFlow Serving or FastAPI endpoints
Best practices
# Automate testing, versioning, and monitoring
# Example: Checking class balance in Python
from collections import Counter
print(Counter(y_train))
# Use AIF360 library to compute fairness metrics
from aif360.metrics import ClassificationMetric
# Document model decisions and data sources for auditability
import shap
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
# TensorFlow Federated example to train without centralized data
from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
# Automate model fairness and performance checks in CI/CD pipelines
# Maintain governance docs and approval workflows
# Use human review for flagged predictions before final action
# Include data processing agreements and consent management
# Monitor training logs for errors and warnings
# Apply gradient clipping in TensorFlow
optimizer = tf.keras.optimizers.Adam()
@tf.function
def train_step():
gradients = tape.gradient(loss, model.trainable_variables)
clipped = [tf.clip_by_norm(g, 1.0) for g in gradients]
optimizer.apply_gradients(zip(clipped, model.trainable_variables))
# Use ReLU activations to mitigate vanishing gradients
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
# Visualize samples and labels to check alignment
# Use train_test_split with stratification and shuffle
import numpy as np
import tensorflow as tf
np.random.seed(42)
tf.random.set_seed(42)
import mlflow
mlflow.log_param("lr", 0.001)
# Use pdb in Python scripts
import pdb; pdb.set_trace()
# TensorBoard profiling example
tensorboard --logdir=logs/profile
// Explore roles on job boards like LinkedIn, Glassdoor
// Use LeetCode and interview prep platforms
// Create repositories with clear READMEs and demos
git init
git add .
git commit -m "Initial commit"
git push origin main
// Examples: Google AI, IBM Data Science Professional Certificate
// Tailor resumes for each job application
// Practice behavioral and technical questions regularly
// Join platforms like Upwork or Freelancer
// Start a blog or YouTube channel on AI topics
// Use arXiv, Twitter, and conferences like NeurIPS