learn-neural-networks / src /streamlit_app.py
schoginitoys's picture
Update src/streamlit_app.py
4db86c2 verified
import streamlit as st
import asyncio
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Ensure an asyncio event loop exists for Streamlit
asyncio.set_event_loop(asyncio.new_event_loop())
# Helper: visualize network architecture
def plot_network(gate, hidden_units, activation):
plt.clf()
fig, ax = plt.subplots(figsize=(10, 5))
ax.axis('off')
# Get model parameters if available
linear_layers = []
if 'model' in st.session_state and st.session_state.model is not None:
linear_layers = [layer for layer in st.session_state.model.children()
if isinstance(layer, nn.Linear)]
# Network structure
if gate in ["AND", "OR"]:
layers = [2, 1]
activations = ["Sigmoid"]
else:
layers = [2, hidden_units, 1]
activations = [activation, "Sigmoid"]
# Drawing parameters
layer_spacing = 4
node_radius = 0.4
vertical_spacing = 2.0
connection_index = 0
for layer_idx, num_nodes in enumerate(layers):
x = layer_idx * layer_spacing
y_base = -(num_nodes-1)*vertical_spacing/2
# Draw nodes
for node in range(num_nodes):
y = y_base + node * vertical_spacing
circle = plt.Circle((x, y), node_radius,
fc='skyblue', ec='black', lw=1.5)
ax.add_patch(circle)
# Input labels
if layer_idx == 0:
plt.text(x, y+node_radius+0.3, f'Input {node+1}',
ha='center', va='bottom', fontsize=8)
# Draw connections and parameters
if layer_idx > 0:
prev_idx = layer_idx - 1
prev_nodes = layers[prev_idx]
x_prev = prev_idx * layer_spacing
y_prev_base = -(prev_nodes-1)*vertical_spacing/2
# Get current layer parameters
weights = None
biases = None
if linear_layers and connection_index < len(linear_layers):
weights = linear_layers[connection_index].weight.detach().numpy()
biases = linear_layers[connection_index].bias.detach().numpy()
# Draw connections with weights
for prev_node in range(prev_nodes):
y_prev = y_prev_base + prev_node * vertical_spacing
for curr_node in range(num_nodes):
y_curr = y_base + curr_node * vertical_spacing
# Connection line
ax.plot([x_prev+node_radius, x-node_radius],
[y_prev, y_curr],
color='gray', lw=1, alpha=0.4)
# Weight annotation
if weights is not None and curr_node < weights.shape[0]:
weight = weights[curr_node, prev_node]
mid_x = (x_prev + x)/2
mid_y = (y_prev + y_curr)/2
angle = np.arctan2(y_curr-y_prev, x-x_prev)*180/np.pi
ax.text(mid_x, mid_y, f'{weight:.2f}',
fontsize=7, color='red',
rotation=angle, ha='center', va='center',
rotation_mode='anchor')
# Bias annotations
if biases is not None:
for curr_node in range(num_nodes):
if curr_node < len(biases):
y = y_base + curr_node * vertical_spacing
ax.text(x+node_radius+0.5, y,
f'b={biases[curr_node]:.2f}',
fontsize=7, color='blue', ha='left', va='center')
connection_index += 1
# Layer labels with numbering
label_y = -(max(layers)-1)*vertical_spacing/2 - 2.0
for layer_idx in range(len(layers)):
x = layer_idx * layer_spacing
if layer_idx == 0:
label = f"Input Layer\n(Layer 0)"
elif layer_idx == len(layers)-1:
label = f"Output Layer\n(Layer {layer_idx})\n{activations[-1]}"
else:
label = f"Hidden Layer\n(Layer {layer_idx})\n{activations[layer_idx-1]}"
plt.text(x, label_y, label, ha='center', va='top',
fontsize=10, weight='bold', bbox=dict(facecolor='white', alpha=0.8))
plt.xlim(-2, (len(layers)-1)*layer_spacing + 2)
plt.ylim(label_y - 1, max(layers)*vertical_spacing)
return fig
# App title and description
st.title("Logic Gate Learning with Neural Networks")
st.markdown("""
Adjust the sidebar parameters to configure and train simple neural networks on AND, OR, or XOR gates.
Use **Train** to fit the model and watch loss convergence, then **Predict** to view outputs.
"""
)
# Sidebar controls
gate = st.sidebar.selectbox("Select Gate", ["AND", "OR", "XOR"])
activation = st.sidebar.selectbox("Activation", ["Tanh", "ReLU"])
optimizer_name = st.sidebar.selectbox("Optimizer", ["SGD", "Adam"])
learning_rate = st.sidebar.slider("Learning Rate", 0.001, 1.0, 0.1, 0.001)
epochs = st.sidebar.slider("Epochs", 10, 2000, 500, 10)
hidden_units = st.sidebar.slider("Hidden Units (XOR)", 2, 16, 2, 1)
# Prepare data
def get_data():
X = torch.tensor([[0,0],[0,1],[1,0],[1,1]], dtype=torch.float)
targets_map = {
"AND": torch.tensor([[0],[0],[0],[1]], dtype=torch.float),
"OR": torch.tensor([[0],[1],[1],[1]], dtype=torch.float),
"XOR": torch.tensor([[0],[1],[1],[0]], dtype=torch.float)
}
return X, targets_map[gate]
# Build model factory
def build_model():
layers = []
if gate in ["AND", "OR"]:
layers += [nn.Linear(2, 1), nn.Sigmoid()]
else:
layers += [nn.Linear(2, hidden_units), getattr(nn, activation)(), nn.Linear(hidden_units, 1), nn.Sigmoid()]
return nn.Sequential(*layers)
# Initialize session state
if 'model' not in st.session_state:
st.session_state.model = build_model()
st.session_state.loss_history = []
# Train & Predict buttons
col1, col2 = st.sidebar.columns(2)
if col1.button("Train"):
X, targets = get_data()
st.session_state.model = build_model()
optimizer = optim.SGD(st.session_state.model.parameters(), lr=learning_rate) if optimizer_name == "SGD" else optim.Adam(st.session_state.model.parameters(), lr=learning_rate)
criterion = nn.BCELoss()
st.session_state.loss_history = []
progress = st.progress(0)
status = st.empty()
for i in range(1, epochs + 1):
optimizer.zero_grad()
out = st.session_state.model(X)
loss = criterion(out, targets)
loss.backward()
optimizer.step()
st.session_state.loss_history.append(loss.item())
progress.progress(i / epochs)
if i == 1 or i == epochs or i % max(1, epochs // 10) == 0:
status.text(f"Epoch {i}/{epochs} – Loss: {loss.item():.4f}")
st.success("Training complete!")
st.subheader("Training Loss over Epochs")
fig, ax = plt.subplots()
ax.plot(st.session_state.loss_history)
ax.set_xlabel("Epoch")
ax.set_ylabel("Binary Cross-Entropy Loss")
st.pyplot(fig)
if col2.button("Predict"):
if not st.session_state.loss_history:
st.warning("Train the model first.")
else:
X, targets = get_data()
with torch.no_grad():
preds = st.session_state.model(X).round().numpy().astype(int)
df_out = pd.DataFrame({
'I1': X[:,0].numpy().astype(int),
'I2': X[:,1].numpy().astype(int),
'Prediction': preds.flatten(),
'Target': targets.numpy().flatten().astype(int)
})
st.subheader("Predictions vs Targets")
st.table(df_out)
# Network diagram
st.subheader(f"Network Architecture Diagram: {gate} Gate")
st.pyplot(plot_network(gate, hidden_units, activation))
# Trained parameters table & formula
if st.session_state.loss_history:
st.subheader("Trained Parameters")
rows = []
for name, param in st.session_state.model.named_parameters():
val = param.detach().numpy()
rows.append({
"Parameter": name,
"Values": np.array2string(val, precision=4, separator=', ')
})
st.table(pd.DataFrame(rows))
# Note for students
st.markdown(
"> **Note:** Activation layers (e.g., ReLU, Sigmoid) do not have learnable parameters, so you won’t see entries like `1.weight` or `1.bias` in this table."
)
st.subheader("Neuron Computation Formula")
st.latex(r"\hat{y} = \sigma(\mathbf{W} \cdot \mathbf{x} + b)")
# Educational notes
st.markdown(f"""
**Model Architecture**
- **{gate}**: {'Linear→Sigmoid' if gate in ['AND','OR'] else f"Linear(2→{hidden_units}) + {activation} + Linear({hidden_units}→1) + Sigmoid"}
**Activation Functions**
- **Tanh** vs **ReLU** influences non-linear decision capability and gradient flow.
**Optimizers**
- **SGD**: fixed-step gradient descent.
- **Adam**: adaptive, per-parameter learning rates.
Use **Train** to see loss converge over epochs, then **Predict** to view the truth table.
Check the **Trained Parameters** table to inspect learned weights & biases, and the formula explains how the neuron computes outputs.
"""
)