#======================================================
# Use of numpy
import numpy as np
def relu(x):
"""ReLU activation function."""
return np.maximum(0, x)
# 1. Setup Data with Teacher Data
# N: Total samples, D_in: Input dimension
# H1, H2: Hidden dimensions, D_out: Output dimension
N, D_in, H1, H2, D_out = 500, 10, 100, 50, 1
# Generate synthetic input data
np.random.seed(42)
x_all = np.random.randn(N, D_in)
# Generate "Teacher Data" (Ground Truth)
# Let's define a non-linear relationship: y = sum(x^2) + noise
y_all = np.sum(x_all**2, axis=1, keepdims=True) + 0.1 * np.random.randn(N, 1)
# Split into Train and Test
split_idx = int(N * 0.8)
x_train, x_test = x_all[:split_idx], x_all[split_idx:]
y_train, y_test = y_all[:split_idx], y_all[split_idx:]
print(f"Data Shapes: Train x={x_train.shape}, y={y_train.shape} | Test x={x_test.shape}, y={y_test.shape}")
# 2. Initialize Weights
w1 = np.random.randn(D_in, H1) * 0.01
w2 = np.random.randn(H1, H2) * 0.01
w3 = np.random.randn(H2, D_out) * 0.01
learning_rate = 1e-4 # Slightly larger LR often helps with small init
print(f"Training NumPy 3-Layer NN for 1000 steps...")
for t in range(1001):
# --- Forward Pass (Training) ---
# Layer 1
h1 = x_train.dot(w1)
h1_relu = relu(h1)
# Layer 2
h2 = h1_relu.dot(w2)
h2_relu = relu(h2)
# Layer 3 (Output)
y_pred = h2_relu.dot(w3)
# Compute Loss (MSE)
loss = np.mean(np.square(y_pred - y_train))
if t % 100 == 0:
print(f"Step {t}: Train Loss = {loss:.4f}")
# --- Backward Pass (Manual Gradients) ---
# dLoss/dy_pred = 2 * (y_pred - y) / N (because we used mean)
grad_y_pred = 2.0 * (y_pred - y_train) / x_train.shape[0]
# Backprop through Layer 3
grad_w3 = h2_relu.T.dot(grad_y_pred)
grad_h2_relu = grad_y_pred.dot(w3.T)
# Backprop through ReLU 2
grad_h2 = grad_h2_relu.copy()
grad_h2[h2 < 0] = 0
# Backprop through Layer 2
grad_w2 = h1_relu.T.dot(grad_h2)
grad_h1_relu = grad_h2.dot(w2.T)
# Backprop through ReLU 1
grad_h1 = grad_h1_relu.copy()
grad_h1[h1 < 0] = 0
# Backprop through Layer 1
grad_w1 = x_train.T.dot(grad_h1)
# --- Update Weights ---
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
w3 -= learning_rate * grad_w3
print("Training Complete.")
# 3. Model Evaluation
print("\n--- Model Evaluation ---")
# Forward pass on Test Data
h1_test = x_test.dot(w1)
h1_relu_test = relu(h1_test)
h2_test = h1_relu_test.dot(w2)
h2_relu_test = relu(h2_test)
y_pred_test = h2_relu_test.dot(w3)
# Compute Test Loss
test_loss = np.mean(np.square(y_pred_test - y_test))
print(f"Test Loss: {test_loss:.4f}")
# Compare first 5 predictions
print("\nFirst 5 Predictions vs Ground Truth:")
for i in range(5):
print(f"Pred: {y_pred_test[i][0]:.4f} | True: {y_test[i][0]:.4f}")
#=================================================================
# Use of torch
import torch
import time
start_time = time.time()
import torch.nn as nn
import torch.optim as optim
# 1. Setup Data with Teacher Data
# N: Total samples, D_in: Input dimension
# H1, H2: Hidden dimensions, D_out: Output dimension
N, D_in, H1, H2, D_out = 500, 10, 100, 50, 1
# Generate synthetic input data
torch.manual_seed(42)
x_all = torch.randn(N, D_in)
# Generate "Teacher Data" (Ground Truth)
# Relationship: y = sum(x^2) + noise
y_all = torch.sum(x_all**2, dim=1, keepdim=True) + 0.1 * torch.randn(N, 1)
# Split into Train and Test
split_idx = int(N * 0.8)
x_train, x_test = x_all[:split_idx], x_all[split_idx:]
y_train, y_test = y_all[:split_idx], y_all[split_idx:]
print(f"Data Shapes: Train x={x_train.shape}, y={y_train.shape} | Test x={x_test.shape}, y={y_test.shape}")
# 2. Define Model
model = nn.Sequential(
nn.Linear(D_in, H1),
nn.ReLU(),
nn.Linear(H1, H2),
nn.ReLU(),
nn.Linear(H2, D_out)
)
# Loss and Optimizer
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
print("Training PyTorch 3-Layer NN for 1000 steps...")
for t in range(1001):
# --- Forward Pass ---
y_pred = model(x_train)
# --- Compute Loss ---
loss = loss_fn(y_pred, y_train)
if t % 100 == 0:
print(f"Step {t}: Train Loss = {loss.item():.4f}")
# --- Backward Pass ---
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("Training Complete.")
# 3. Model Evaluation
print("\n--- Model Evaluation ---")
model.eval() # Set model to evaluation mode
with torch.no_grad(): # Disable gradient calculation
y_pred_test = model(x_test)
test_loss = loss_fn(y_pred_test, y_test)
print(f"Test Loss: {test_loss.item():.4f}")
# Compare first 5 predictions
print("\nFirst 5 Predictions vs Ground Truth:")
for i in range(5):
print(f"Pred: {y_pred_test[i].item():.4f} | True: {y_test[i].item():.4f}")
end_time = time.time()
print(f"\nTotal Execution Time: {end_time - start_time:.4f} seconds")