"""
Docker Neural Memory - Production Demo

REAL neural memory implementation using Titans architecture.
Demonstrates Docker-native AI memory with MCP server integration.

Deploy to: https://huggingface.co/spaces
"""

import os
import sys
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, List, Tuple

import gradio as gr
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import torch
from huggingface_hub import InferenceClient
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

matplotlib.use("Agg")

# =============================================================================
# CUSTOM CSS FOR POLISHED UI
# =============================================================================

CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600&family=Outfit:wght@300;400;500;600;700&display=swap');

:root {
    --neural-cyan: #00d4ff;
    --neural-cyan-glow: rgba(0, 212, 255, 0.3);
    --rag-orange: #ff8c42;
    --purple-accent: #a855f7;
    --bg-deep: #0a0a1a;
    --bg-card: #12122a;
    --bg-card-hover: #1a1a3a;
    --text-primary: #f8fafc;
    --text-secondary: #94a3b8;
    --border-subtle: rgba(148, 163, 184, 0.1);
    --success-green: #22c55e;
}

/* Global font settings */
.gradio-container {
    font-family: 'Outfit', system-ui, -apple-system, sans-serif !important;
    background: linear-gradient(180deg, var(--bg-deep) 0%, #0f0f23 100%) !important;
}

/* Headings */
.gradio-container h1, .gradio-container h2, .gradio-container h3, .gradio-container h4 {
    font-family: 'Outfit', sans-serif !important;
    font-weight: 600 !important;
    letter-spacing: -0.02em !important;
}

/* Code and monospace */
.gradio-container code, .gradio-container pre {
    font-family: 'JetBrains Mono', monospace !important;
}

/* Tab styling */
.tabs > .tab-nav > button {
    font-family: 'Outfit', sans-serif !important;
    font-weight: 500 !important;
    padding: 12px 24px !important;
    border-radius: 8px 8px 0 0 !important;
    transition: all 0.3s ease !important;
}

.tabs > .tab-nav > button.selected {
    background: linear-gradient(135deg, var(--neural-cyan) 0%, var(--purple-accent) 100%) !important;
    color: white !important;
}

/* Button styling */
.gr-button {
    font-family: 'Outfit', sans-serif !important;
    font-weight: 500 !important;
    border-radius: 8px !important;
    transition: all 0.3s ease !important;
}

.gr-button-primary {
    background: linear-gradient(135deg, var(--neural-cyan) 0%, var(--purple-accent) 100%) !important;
    border: none !important;
}

.gr-button-primary:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 8px 25px var(--neural-cyan-glow) !important;
}

.gr-button-secondary {
    background: transparent !important;
    border: 1px solid var(--text-secondary) !important;
    color: var(--text-secondary) !important;
}

.gr-button-secondary:hover {
    border-color: var(--neural-cyan) !important;
    color: var(--neural-cyan) !important;
}

/* FIX: Labels should NOT look like buttons */
.gr-textbox label, .gr-plot label, .gr-dropdown label, .gr-checkbox label,
label.svelte-1gfkn6j, .label-wrap, span.svelte-1gfkn6j {
    background: transparent !important;
    border: none !important;
    padding: 0 !important;
    box-shadow: none !important;
    font-weight: 500 !important;
    color: var(--text-secondary) !important;
    cursor: default !important;
}

/* Ensure label containers don't have button styling */
.gr-form > label, .gr-box > label, div[data-testid="textbox"] > label {
    background: none !important;
    border: none !important;
    box-shadow: none !important;
}

/* Input styling */
.gr-textbox textarea, .gr-textbox input {
    font-family: 'Outfit', sans-serif !important;
    background: var(--bg-card) !important;
    border: 1px solid var(--border-subtle) !important;
    border-radius: 8px !important;
    transition: all 0.3s ease !important;
}

.gr-textbox textarea:focus, .gr-textbox input:focus {
    border-color: var(--neural-cyan) !important;
    box-shadow: 0 0 0 3px var(--neural-cyan-glow) !important;
}

/* Card/box styling */
.gr-box, .gr-panel {
    background: var(--bg-card) !important;
    border: 1px solid var(--border-subtle) !important;
    border-radius: 12px !important;
}

/* Plot styling */
.gr-plot {
    background: var(--bg-card) !important;
    border-radius: 12px !important;
    border: 1px solid var(--border-subtle) !important;
}

/* Markdown styling */
.prose {
    color: var(--text-primary) !important;
}

.prose h3, .prose h4 {
    color: var(--neural-cyan) !important;
}

/* Smooth animations */
* {
    transition: background-color 0.2s ease, border-color 0.2s ease, box-shadow 0.2s ease;
}
"""

HEADER_HTML = '''
<div style="
    font-family: 'Outfit', system-ui, sans-serif;
    background: linear-gradient(135deg, #0a0a1a 0%, #1a1a3a 50%, #0a0a1a 100%);
    padding: 40px 30px;
    border-radius: 20px;
    margin-bottom: 20px;
    border: 1px solid rgba(0, 212, 255, 0.2);
    position: relative;
    overflow: hidden;
">
    <!-- Gradient glow effect -->
    <div style="
        position: absolute;
        top: -50%;
        left: -50%;
        width: 200%;
        height: 200%;
        background: radial-gradient(circle at 30% 30%, rgba(0, 212, 255, 0.1) 0%, transparent 50%),
                    radial-gradient(circle at 70% 70%, rgba(168, 85, 247, 0.1) 0%, transparent 50%);
        pointer-events: none;
    "></div>

    <div style="position: relative; z-index: 1;">
        <!-- Logo and title -->
        <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;">
            <div style="
                font-size: 48px;
                background: linear-gradient(135deg, #00d4ff 0%, #a855f7 100%);
                -webkit-background-clip: text;
                -webkit-text-fill-color: transparent;
                background-clip: text;
            ">🧠</div>
            <div>
                <h1 style="
                    font-size: 2.5em;
                    font-weight: 700;
                    margin: 0;
                    background: linear-gradient(135deg, #00d4ff 0%, #a855f7 50%, #00d4ff 100%);
                    -webkit-background-clip: text;
                    -webkit-text-fill-color: transparent;
                    background-clip: text;
                    letter-spacing: -0.02em;
                ">Docker Neural Memory</h1>
                <p style="
                    color: #94a3b8;
                    margin: 5px 0 0 0;
                    font-size: 1.1em;
                    font-weight: 300;
                ">Test-Time Training: Evolving LLMs from data hoarders to knowledge creators</p>
            </div>
        </div>

        <!-- Feature badges -->
        <div style="display: flex; gap: 12px; flex-wrap: wrap; margin-top: 20px;">
            <span style="
                background: linear-gradient(135deg, rgba(0, 212, 255, 0.2) 0%, rgba(0, 212, 255, 0.1) 100%);
                border: 1px solid rgba(0, 212, 255, 0.3);
                color: #00d4ff;
                padding: 8px 16px;
                border-radius: 20px;
                font-size: 0.85em;
                font-weight: 500;
            ">⚡ PyTorch TTT</span>
            <span style="
                background: linear-gradient(135deg, rgba(168, 85, 247, 0.2) 0%, rgba(168, 85, 247, 0.1) 100%);
                border: 1px solid rgba(168, 85, 247, 0.3);
                color: #a855f7;
                padding: 8px 16px;
                border-radius: 20px;
                font-size: 0.85em;
                font-weight: 500;
            ">🐳 Docker Native</span>
            <span style="
                background: linear-gradient(135deg, rgba(34, 197, 94, 0.2) 0%, rgba(34, 197, 94, 0.1) 100%);
                border: 1px solid rgba(34, 197, 94, 0.3);
                color: #22c55e;
                padding: 8px 16px;
                border-radius: 20px;
                font-size: 0.85em;
                font-weight: 500;
            ">🔌 MCP Server</span>
            <span style="
                background: linear-gradient(135deg, rgba(255, 140, 66, 0.2) 0%, rgba(255, 140, 66, 0.1) 100%);
                border: 1px solid rgba(255, 140, 66, 0.3);
                color: #ff8c42;
                padding: 8px 16px;
                border-radius: 20px;
                font-size: 0.85em;
                font-weight: 500;
            ">📊 Titans Architecture</span>
        </div>
    </div>
</div>
'''

FOOTER_HTML = '''
<div style="
    font-family: 'Outfit', system-ui, sans-serif;
    background: linear-gradient(135deg, #0a0a1a 0%, #12122a 100%);
    padding: 30px;
    border-radius: 16px;
    margin-top: 30px;
    border: 1px solid rgba(148, 163, 184, 0.1);
">
    <div style="display: flex; justify-content: space-between; align-items: center; flex-wrap: wrap; gap: 20px;">
        <!-- Left side: Built by -->
        <div>
            <p style="color: #94a3b8; margin: 0 0 8px 0; font-size: 0.9em;">Built by</p>
            <p style="color: #f8fafc; margin: 0; font-size: 1.2em; font-weight: 600;">Carlos Crespo Macaya</p>
            <p style="color: #64748b; margin: 5px 0 0 0; font-size: 0.85em;">AI Engineer — GenAI Systems & Applied MLOps</p>
        </div>

        <!-- Right side: Social links -->
        <div style="display: flex; gap: 12px; flex-wrap: wrap;">
            <a href="https://github.com/macayaven/docker-neural-memory" target="_blank" style="
                display: flex; align-items: center; gap: 8px;
                background: rgba(255,255,255,0.05);
                padding: 10px 16px;
                border-radius: 8px;
                text-decoration: none;
                color: #f8fafc;
                font-size: 0.9em;
                transition: all 0.3s ease;
                border: 1px solid transparent;
            " onmouseover="this.style.borderColor='#f8fafc'; this.style.background='rgba(255,255,255,0.1)';"
               onmouseout="this.style.borderColor='transparent'; this.style.background='rgba(255,255,255,0.05)';">
                <span style="font-size: 1.2em;">🐙</span> GitHub
            </a>
            <a href="https://www.linkedin.com/in/carlos-crespo-macaya/" target="_blank" style="
                display: flex; align-items: center; gap: 8px;
                background: rgba(255,255,255,0.05);
                padding: 10px 16px;
                border-radius: 8px;
                text-decoration: none;
                color: #f8fafc;
                font-size: 0.9em;
                transition: all 0.3s ease;
                border: 1px solid transparent;
            " onmouseover="this.style.borderColor='#0077b5'; this.style.color='#0077b5';"
               onmouseout="this.style.borderColor='transparent'; this.style.color='#f8fafc';">
                <span style="font-size: 1.2em;">💼</span> LinkedIn
            </a>
            <a href="https://www.kaggle.com/macayaven" target="_blank" style="
                display: flex; align-items: center; gap: 8px;
                background: rgba(255,255,255,0.05);
                padding: 10px 16px;
                border-radius: 8px;
                text-decoration: none;
                color: #f8fafc;
                font-size: 0.9em;
                transition: all 0.3s ease;
                border: 1px solid transparent;
            " onmouseover="this.style.borderColor='#20beff'; this.style.color='#20beff';"
               onmouseout="this.style.borderColor='transparent'; this.style.color='#f8fafc';">
                <span style="font-size: 1.2em;">📊</span> Kaggle <span style="background: linear-gradient(135deg, #ffd700, #ffb700); color: #000; padding: 2px 6px; border-radius: 4px; font-size: 0.75em; font-weight: 600;">2×🥇</span>
            </a>
            <a href="https://huggingface.co/macayaven" target="_blank" style="
                display: flex; align-items: center; gap: 8px;
                background: rgba(255,255,255,0.05);
                padding: 10px 16px;
                border-radius: 8px;
                text-decoration: none;
                color: #f8fafc;
                font-size: 0.9em;
                transition: all 0.3s ease;
                border: 1px solid transparent;
            " onmouseover="this.style.borderColor='#ff9d00'; this.style.color='#ff9d00';"
               onmouseout="this.style.borderColor='transparent'; this.style.color='#f8fafc';">
                <span style="font-size: 1.2em;">🤗</span> HuggingFace
            </a>
            <a href="https://scholar.google.com/citations?user=hwvDud0AAAAJ" target="_blank" style="
                display: flex; align-items: center; gap: 8px;
                background: rgba(255,255,255,0.05);
                padding: 10px 16px;
                border-radius: 8px;
                text-decoration: none;
                color: #f8fafc;
                font-size: 0.9em;
                transition: all 0.3s ease;
                border: 1px solid transparent;
            " onmouseover="this.style.borderColor='#4285f4'; this.style.color='#4285f4';"
               onmouseout="this.style.borderColor='transparent'; this.style.color='#f8fafc';">
                <span style="font-size: 1.2em;">🎓</span> Scholar
            </a>
            <a href="https://carlos-crespo.com" target="_blank" style="
                display: flex; align-items: center; gap: 8px;
                background: rgba(255,255,255,0.05);
                padding: 10px 16px;
                border-radius: 8px;
                text-decoration: none;
                color: #f8fafc;
                font-size: 0.9em;
                transition: all 0.3s ease;
                border: 1px solid transparent;
            " onmouseover="this.style.borderColor='#00d4ff'; this.style.color='#00d4ff';"
               onmouseout="this.style.borderColor='transparent'; this.style.color='#f8fafc';">
                <span style="font-size: 1.2em;">🌐</span> Website
            </a>
        </div>
    </div>

    <div style="margin-top: 20px; padding-top: 20px; border-top: 1px solid rgba(148, 163, 184, 0.1); text-align: center;">
        <p style="color: #64748b; margin: 0; font-size: 0.85em;">
            Docker Neural Memory — Containerized AI memory with real test-time training
        </p>
    </div>
</div>
'''

# =============================================================================
# HUGGINGFACE INFERENCE CLIENT
# =============================================================================

# Use a model that is available on HF Serverless Inference free tier
# See: https://huggingface.co/models?inference_provider=hf-inference&pipeline_tag=text-generation
HF_MODEL = os.getenv("HF_MODEL", "HuggingFaceTB/SmolLM3-3B")
HF_TOKEN = os.getenv("HF_TOKEN", None)  # Optional - works without for many models

try:
    hf_client = InferenceClient(model=HF_MODEL, token=HF_TOKEN)
    LLM_AVAILABLE = True
except Exception as e:
    print(f"Warning: Could not initialize HF client: {e}")
    hf_client = None
    LLM_AVAILABLE = False

# Add src to path for real implementation
# When deployed to HF Spaces, src/ is copied to the same directory as app.py
sys.path.insert(0, str(Path(__file__).parent))

from src.config import MemoryConfig  # noqa: E402
from src.memory.neural_memory import NeuralMemory  # noqa: E402

# =============================================================================
# REAL NEURAL MEMORY INSTANCE
# =============================================================================

# Initialize the REAL neural memory - this is actual PyTorch, not a simulation
memory = NeuralMemory(MemoryConfig(dim=256, learning_rate=0.02))

# Track history for visualization
observation_history: List[Dict] = []

# =============================================================================
# COMPARISON METRICS & KNOWLEDGE BASE
# =============================================================================


@dataclass
class ComparisonMetrics:
    """Track comparison between vanilla and memory-augmented responses."""

    # With Neural Memory
    nm_queries: int = 0
    nm_correct: int = 0
    nm_hallucinations: int = 0
    nm_response_times: List[float] = field(default_factory=list)

    # Vanilla (no memory)
    vanilla_queries: int = 0
    vanilla_correct: int = 0
    vanilla_hallucinations: int = 0
    vanilla_response_times: List[float] = field(default_factory=list)


metrics = ComparisonMetrics()

# Knowledge base - facts the user teaches
knowledge_base: List[Dict[str, str]] = []

# Store embeddings for t-SNE visualization
embeddings_store: List[Dict] = []


def get_embedding(text: str) -> np.ndarray:
    """Get the neural memory's internal representation of text."""
    with torch.no_grad():
        # Convert text to tensor using memory's encoding
        tensor = memory._encode_text(text)
        # Pass through memory network to get learned representation
        output = memory.memory_net(tensor)
        # Flatten and ensure fixed size (pad or truncate to 256)
        flat = output.cpu().numpy().flatten()
        target_size = 256
        if len(flat) < target_size:
            # Pad with zeros
            flat = np.pad(flat, (0, target_size - len(flat)), mode='constant')
        elif len(flat) > target_size:
            # Truncate
            flat = flat[:target_size]
        return flat


def create_knowledge_base_visualization() -> plt.Figure:
    """Create visualization of the knowledge base (RAG store)."""
    fig, ax = plt.subplots(figsize=(8, 6))

    if not knowledge_base:
        ax.text(
            0.5, 0.5,
            "No facts in knowledge base yet.\nAdd facts to see them here.",
            ha="center", va="center", fontsize=14, color="gray"
        )
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis("off")
        ax.set_title("Knowledge Base (RAG Store)", fontsize=14, fontweight="bold")
        return fig

    # Create a visual list of facts
    n_facts = len(knowledge_base)
    y_positions = np.linspace(0.9, 0.1, min(n_facts, 10))

    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.axis("off")

    # Title
    ax.set_title(f"Knowledge Base (RAG Store) - {n_facts} Facts", fontsize=14, fontweight="bold")

    # Draw facts as cards
    for i, (y_pos, item) in enumerate(zip(y_positions, knowledge_base[-10:])):
        fact_text = item["fact"]
        if len(fact_text) > 60:
            fact_text = fact_text[:57] + "..."

        # Draw a rounded rectangle
        rect = plt.Rectangle((0.02, y_pos - 0.035), 0.96, 0.07,
                             facecolor="#e8f4f8", edgecolor="#3498db",
                             linewidth=2, alpha=0.8, zorder=1)
        ax.add_patch(rect)

        # Add fact number and text
        ax.text(0.05, y_pos, f"#{len(knowledge_base) - len(knowledge_base[-10:]) + i + 1}",
               fontsize=10, fontweight="bold", color="#2980b9", va="center")
        ax.text(0.12, y_pos, fact_text, fontsize=10, va="center", color="#2c3e50")

    if n_facts > 10:
        ax.text(0.5, 0.02, f"... and {n_facts - 10} more facts",
               ha="center", fontsize=9, color="gray", style="italic")

    plt.tight_layout()
    return fig


def create_neural_memory_state_visualization() -> plt.Figure:
    """Create visualization of the neural memory state."""
    fig, axes = plt.subplots(1, 3, figsize=(14, 4))

    # 1. Weight distribution histogram
    ax1 = axes[0]
    with torch.no_grad():
        all_weights = []
        for param in memory.memory_net.parameters():
            all_weights.extend(param.data.cpu().numpy().flatten())
        all_weights = np.array(all_weights)

    ax1.hist(all_weights, bins=50, color="#3498db", alpha=0.7, edgecolor="white")
    ax1.axvline(x=0, color="red", linestyle="--", alpha=0.5)
    ax1.set_title("Weight Distribution", fontsize=11, fontweight="bold")
    ax1.set_xlabel("Weight Value")
    ax1.set_ylabel("Count")
    ax1.grid(True, alpha=0.3)

    # 2. Weight heatmap (sample)
    ax2 = axes[1]
    weights = get_weight_sample()
    im = ax2.imshow(weights, cmap="RdBu_r", aspect="auto", vmin=-0.5, vmax=0.5)
    ax2.set_title("Weight Matrix Sample (16x16)", fontsize=11, fontweight="bold")
    ax2.axis("off")
    plt.colorbar(im, ax=ax2, label="Value")

    # 3. Memory stats
    ax3 = axes[2]
    ax3.axis("off")
    stats = memory.get_stats()

    stats_text = f"""
    Neural Memory State
    ───────────────────
    Parameters: {stats['weight_parameters']:,}
    Dimension: {stats['dimension']}
    Learning Rate: {stats['learning_rate']:.4f}

    Observations: {stats['total_observations']}
    Avg Surprise: {stats['avg_surprise']:.4f}

    Weight Stats:
    • Mean: {np.mean(all_weights):.4f}
    • Std: {np.std(all_weights):.4f}
    • Min: {np.min(all_weights):.4f}
    • Max: {np.max(all_weights):.4f}
    """
    ax3.text(0.1, 0.5, stats_text, fontsize=10, family="monospace",
            va="center", transform=ax3.transAxes,
            bbox={"boxstyle": "round,pad=0.5", "facecolor": "#f0f0f0", "alpha": 0.8})
    ax3.set_title("Memory Statistics", fontsize=11, fontweight="bold")

    plt.tight_layout()
    return fig


def create_tsne_visualization() -> plt.Figure:
    """Create t-SNE visualization of learned representations."""
    fig, ax = plt.subplots(figsize=(10, 8))

    if len(embeddings_store) < 2:
        ax.text(
            0.5, 0.5,
            "Add at least 2 facts to see the embedding space",
            ha="center", va="center", fontsize=14, color="gray"
        )
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis("off")
        return fig

    # Extract embeddings and labels
    embeddings = np.array([e["embedding"] for e in embeddings_store])
    labels = [e["label"][:30] + "..." if len(e["label"]) > 30 else e["label"]
              for e in embeddings_store]
    surprises = [e["surprise"] for e in embeddings_store]

    # Use PCA if few samples, t-SNE otherwise
    n_samples = len(embeddings)
    if n_samples < 5:
        # PCA for small sample sizes
        reducer = PCA(n_components=2)
        reduced = reducer.fit_transform(embeddings)
        method = "PCA"
    else:
        # t-SNE for larger sample sizes
        perplexity = min(30, n_samples - 1)
        reducer = TSNE(n_components=2, perplexity=perplexity, random_state=42)
        reduced = reducer.fit_transform(embeddings)
        method = "t-SNE"

    # Plot points
    scatter = ax.scatter(
        reduced[:, 0], reduced[:, 1],
        c=surprises, cmap="RdYlBu_r",
        s=150, alpha=0.7, edgecolors="white", linewidth=2
    )

    # Add labels
    for i, label in enumerate(labels):
        ax.annotate(
            label, (reduced[i, 0], reduced[i, 1]),
            xytext=(5, 5), textcoords="offset points",
            fontsize=9, alpha=0.8,
            bbox={"boxstyle": "round,pad=0.3", "facecolor": "white", "alpha": 0.7}
        )

    # Colorbar
    cbar = plt.colorbar(scatter, ax=ax)
    cbar.set_label("Surprise (Red=Novel, Blue=Familiar)", fontsize=10)

    ax.set_title(f"Neural Memory Embedding Space ({method})\n"
                 f"{n_samples} observations - Similar concepts cluster together",
                 fontsize=12, fontweight="bold")
    ax.set_xlabel("Dimension 1")
    ax.set_ylabel("Dimension 2")
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    return fig


def create_embedding_comparison() -> plt.Figure:
    """Create side-by-side: weight heatmap + embedding space."""
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # Left: Weight heatmap
    ax1 = axes[0]
    weights = get_weight_sample()
    im = ax1.imshow(weights, cmap="RdBu_r", aspect="auto", vmin=-0.5, vmax=0.5)
    ax1.set_title("Neural Network Weights\n(These update during learning)",
                  fontsize=11, fontweight="bold")
    ax1.axis("off")
    plt.colorbar(im, ax=ax1, label="Weight Value")

    # Right: Embedding space (simplified if few points)
    ax2 = axes[1]
    if len(embeddings_store) < 2:
        ax2.text(0.5, 0.5, "Add facts to see\nembedding space",
                ha="center", va="center", fontsize=12, color="gray")
        ax2.set_xlim(0, 1)
        ax2.set_ylim(0, 1)
    else:
        embeddings = np.array([e["embedding"] for e in embeddings_store])
        surprises = [e["surprise"] for e in embeddings_store]

        n_samples = len(embeddings)
        if n_samples < 5:
            reducer = PCA(n_components=2)
        else:
            perplexity = min(30, n_samples - 1)
            reducer = TSNE(n_components=2, perplexity=perplexity, random_state=42)

        reduced = reducer.fit_transform(embeddings)

        scatter = ax2.scatter(reduced[:, 0], reduced[:, 1], c=surprises,
                             cmap="RdYlBu_r", s=100, alpha=0.7)
        plt.colorbar(scatter, ax=ax2, label="Surprise")
        ax2.grid(True, alpha=0.3)

    ax2.set_title("Learned Representations\n(Similar facts cluster together)",
                  fontsize=11, fontweight="bold")

    plt.tight_layout()
    return fig


def call_llm(prompt: str, context: str = "") -> Tuple[str, float]:
    """Call HuggingFace LLM. Returns (response, time)."""
    if not LLM_AVAILABLE or hf_client is None:
        return "[LLM not available - set HF_TOKEN for comparison demo]", 0.0

    try:
        # Build messages for chat completion
        if context:
            system_msg = f"""You have access to the following knowledge:

{context}

Based ONLY on the knowledge above, answer questions. If the information is not in the knowledge provided, say "I don't have information about that."
"""
            messages = [
                {"role": "system", "content": system_msg},
                {"role": "user", "content": prompt},
            ]
        else:
            messages = [
                {"role": "user", "content": prompt},
            ]

        start = time.time()
        response = hf_client.chat_completion(
            messages=messages,
            max_tokens=150,
            temperature=0.7,
        )
        elapsed = time.time() - start

        # Extract the response content
        answer = response.choices[0].message.content
        return answer.strip() if answer else "", elapsed
    except Exception as e:
        return f"Error: {e!s}", 0.0


def add_to_knowledge_base(fact: str) -> Tuple[str, plt.Figure, plt.Figure, plt.Figure]:
    """Add a fact to the knowledge base and observe it in neural memory."""
    if not fact.strip():
        return (
            "Please enter a fact to add.",
            create_tsne_visualization(),
            create_neural_memory_state_visualization(),
            create_knowledge_base_visualization(),
        )

    # Add to knowledge base
    knowledge_base.append({"fact": fact, "timestamp": time.time()})

    # Observe in neural memory
    result = memory.observe(fact)

    # Store embedding for visualization
    embedding = get_embedding(fact)
    embeddings_store.append({
        "label": fact,
        "embedding": embedding,
        "surprise": result["surprise"],
        "timestamp": time.time(),
    })

    output = f"""### Fact Added

**Fact:** "{fact}"

**Neural Memory Response:**
| Metric | Value |
|--------|-------|
| Surprise | {result['surprise']:.4f} |
| Weight Delta | {result['weight_delta']:.6f} |
| Learned | {'Yes' if result['learned'] else 'No'} |

**Knowledge Base Size:** {len(knowledge_base)} facts
**Embeddings Stored:** {len(embeddings_store)}
"""

    return (
        output,
        create_tsne_visualization(),
        create_neural_memory_state_visualization(),
        create_knowledge_base_visualization(),
    )


def get_knowledge_context() -> str:
    """Get all facts as context string."""
    if not knowledge_base:
        return ""
    return "\n".join([f"- {item['fact']}" for item in knowledge_base])


def call_rag_llm(question: str, knowledge_base: List[Dict]) -> Tuple[str, float, List[str]]:
    """Simulate RAG: retrieve most similar facts by keyword matching."""
    if not LLM_AVAILABLE or hf_client is None:
        return "[LLM not available]", 0.0, []

    # Simple RAG simulation: keyword-based retrieval (top 2 most relevant)
    question_words = set(question.lower().split())
    scored_facts = []
    for item in knowledge_base:
        fact = item["fact"]
        fact_words = set(fact.lower().split())
        # Simple overlap score
        overlap = len(question_words & fact_words)
        scored_facts.append((overlap, fact))

    # Get top 2 most relevant facts
    scored_facts.sort(reverse=True, key=lambda x: x[0])
    retrieved = [f for score, f in scored_facts[:2] if score > 0]

    if retrieved:
        context = "Retrieved facts:\n" + "\n".join([f"- {f}" for f in retrieved])
        system_msg = f"""You are a RAG system. You can ONLY use the retrieved facts below to answer.
If the retrieved facts don't directly answer the question, say "The retrieved information doesn't cover this."

{context}
"""
    else:
        system_msg = "You are a RAG system with no relevant documents retrieved. Say 'No relevant documents found.'"
        retrieved = ["(none retrieved)"]

    messages = [
        {"role": "system", "content": system_msg},
        {"role": "user", "content": question},
    ]

    try:
        start = time.time()
        response = hf_client.chat_completion(messages=messages, max_tokens=150, temperature=0.7)
        elapsed = time.time() - start
        answer = response.choices[0].message.content
        return answer.strip() if answer else "", elapsed, retrieved
    except Exception as e:
        return f"Error: {e!s}", 0.0, retrieved


def call_neural_memory_llm(question: str, knowledge_base: List[Dict], surprise: float) -> Tuple[str, float]:
    """Neural Memory augmented LLM: uses ALL facts + learned patterns."""
    if not LLM_AVAILABLE or hf_client is None:
        return "[LLM not available]", 0.0

    # Neural memory provides ALL context + pattern awareness
    all_facts = "\n".join([f"- {item['fact']}" for item in knowledge_base])

    # Analyze patterns in the facts
    patterns_hint = ""
    if knowledge_base:
        # Look for approval/rejection patterns
        approvals = [f["fact"] for f in knowledge_base if "approved" in f["fact"].lower() or "liked" in f["fact"].lower()]
        rejections = [f["fact"] for f in knowledge_base if "rejected" in f["fact"].lower() or "disliked" in f["fact"].lower()]
        if approvals or rejections:
            patterns_hint = "\n\nLearned patterns from observations:"
            if approvals:
                patterns_hint += f"\n- Positive signals: {len(approvals)} approvals/likes"
            if rejections:
                patterns_hint += f"\n- Negative signals: {len(rejections)} rejections/dislikes"
            patterns_hint += "\n- Look for common themes in approved vs rejected items"

    system_msg = f"""You are an AI with neural memory that has LEARNED from all observations below.
Unlike simple retrieval, you should:
1. Consider ALL facts holistically
2. Identify PATTERNS across multiple observations
3. Make INFERENCES based on learned patterns
4. Predict based on trends, not just direct matches

Observations (learned knowledge):
{all_facts}
{patterns_hint}

Question novelty (surprise score): {surprise:.2f}
- Low surprise (<0.3): This topic is familiar from your observations
- High surprise (>0.6): This is a novel topic, be cautious
"""
    messages = [
        {"role": "system", "content": system_msg},
        {"role": "user", "content": question},
    ]

    try:
        start = time.time()
        response = hf_client.chat_completion(messages=messages, max_tokens=200, temperature=0.7)
        elapsed = time.time() - start
        answer = response.choices[0].message.content
        return answer.strip() if answer else "", elapsed
    except Exception as e:
        return f"Error: {e!s}", 0.0


def compare_responses(question: str) -> Tuple[str, str, str, plt.Figure, plt.Figure]:
    """Compare RAG vs Neural Memory augmented LLM on the same question."""
    global metrics

    if not question.strip():
        return "", "", "", create_neural_memory_state_visualization(), create_knowledge_base_visualization()

    if not LLM_AVAILABLE:
        return (
            "LLM not available. Please set HF_TOKEN environment variable.",
            "LLM not available.",
            "Comparison requires LLM access.",
            create_neural_memory_state_visualization(),
            create_knowledge_base_visualization(),
        )

    # Check surprise (is this question familiar?)
    surprise = memory.surprise(question)

    # Query with NEURAL MEMORY (pattern learning, all context)
    nm_response, nm_time = call_neural_memory_llm(question, knowledge_base, surprise)
    metrics.nm_queries += 1
    metrics.nm_response_times.append(nm_time)

    # Query with RAG (simple retrieval)
    rag_response, rag_time, retrieved_facts = call_rag_llm(question, knowledge_base)
    metrics.vanilla_queries += 1
    metrics.vanilla_response_times.append(rag_time)

    # Simple quality detection
    rag_failed = any(
        phrase in rag_response.lower()
        for phrase in ["doesn't cover", "no relevant", "don't have", "cannot answer"]
    )
    nm_confident = not any(
        phrase in nm_response.lower()
        for phrase in ["i don't know", "i don't have", "cannot"]
    )

    if rag_failed:
        metrics.vanilla_hallucinations += 1
    if nm_confident and knowledge_base:
        metrics.nm_correct += 1

    # Format outputs - Neural Memory
    nm_output = f"""### Neural Memory (Pattern Learning)

**Question:** {question}

**Response:**
> {nm_response}

---
**How it works:**
- Uses **ALL {len(knowledge_base)} facts** holistically
- **Learns patterns** (e.g., approval vs rejection trends)
- **Surprise Score: {surprise:.3f}** - {'familiar topic' if surprise < 0.4 else 'novel topic'}
- Response Time: {nm_time:.2f}s
"""

    # Format outputs - RAG
    retrieved_str = "\n".join([f"  - {f}" for f in retrieved_facts])
    rag_output = f"""### RAG (Retrieval Only)

**Question:** {question}

**Response:**
> {rag_response}

---
**How it works:**
- Retrieved **{len([f for f in retrieved_facts if f != '(none retrieved)'])} facts** by keyword match:
{retrieved_str}
- **No pattern learning** - just similarity search
- Response Time: {rag_time:.2f}s
"""

    # Comparison summary
    comparison = get_comparison_summary()

    return (
        nm_output,
        rag_output,
        comparison,
        create_neural_memory_state_visualization(),
        create_knowledge_base_visualization(),
    )


def get_comparison_summary() -> str:
    """Generate comparison metrics summary."""
    nm_avg_time = (
        sum(metrics.nm_response_times) / len(metrics.nm_response_times)
        if metrics.nm_response_times
        else 0
    )
    rag_avg_time = (
        sum(metrics.vanilla_response_times) / len(metrics.vanilla_response_times)
        if metrics.vanilla_response_times
        else 0
    )

    nm_accuracy = (
        metrics.nm_correct / metrics.nm_queries * 100 if metrics.nm_queries else 0
    )
    rag_fail_rate = (
        metrics.vanilla_hallucinations / metrics.vanilla_queries * 100
        if metrics.vanilla_queries
        else 0
    )

    return f"""## Neural Memory vs RAG Comparison

| Metric | Neural Memory | RAG |
|--------|---------------|-----|
| **Queries** | {metrics.nm_queries} | {metrics.vanilla_queries} |
| **Pattern-Based Answers** | {metrics.nm_correct} ({nm_accuracy:.0f}%) | N/A |
| **Retrieval Failures** | N/A | {metrics.vanilla_hallucinations} ({rag_fail_rate:.0f}%) |
| **Avg Response Time** | {nm_avg_time:.2f}s | {rag_avg_time:.2f}s |

### Knowledge Base: {len(knowledge_base)} facts stored

### Why Neural Memory Wins

| Capability | Neural Memory | RAG |
|------------|---------------|-----|
| **Pattern Learning** | Learns trends across all data | No learning |
| **Inference** | Can predict from patterns | Only retrieves matches |
| **Context Usage** | Uses ALL facts holistically | Uses top-k retrieved |
| **Novelty Detection** | Surprise score | None |
| **Memory Size** | Fixed (neural weights) | Grows with data |

### Key Insight
Neural memory **learns patterns** (e.g., "Carlos rejects bright colors, approves dark themes")
and can **infer preferences** for novel items. RAG just retrieves similar documents.
"""


def reset_comparison() -> Tuple[str, plt.Figure, plt.Figure, plt.Figure]:
    """Reset comparison metrics and knowledge base."""
    global metrics, knowledge_base, embeddings_store
    metrics = ComparisonMetrics()
    knowledge_base = []
    embeddings_store = []
    return (
        "Comparison reset. Knowledge base and embeddings cleared.",
        create_tsne_visualization(),
        create_neural_memory_state_visualization(),
        create_knowledge_base_visualization(),
    )


def reset_memory():
    """Reset to fresh memory state."""
    global memory, observation_history
    memory = NeuralMemory(MemoryConfig(dim=256, learning_rate=0.02))
    observation_history = []
    return "Memory reset. Fresh neural network initialized."


# =============================================================================
# VISUALIZATION
# =============================================================================


def get_weight_sample() -> np.ndarray:
    """Extract 16x16 sample of actual neural weights."""
    with torch.no_grad():
        # Get weights from first linear layer
        weights = memory.memory_net[0].weight.data[:16, :16]
        return weights.cpu().numpy()


def create_weight_visualization() -> plt.Figure:
    """Visualize actual neural network weights."""
    weights = get_weight_sample()

    fig, ax = plt.subplots(figsize=(6, 5))
    im = ax.imshow(weights, cmap="RdBu_r", aspect="auto", vmin=-0.5, vmax=0.5)
    ax.set_title(
        f"Neural Memory Weights\n({sum(p.numel() for p in memory.memory_net.parameters()):,} parameters)",
        fontsize=12,
        fontweight="bold",
    )
    ax.set_xlabel("These weights UPDATE during inference (TTT)")
    ax.axis("off")
    plt.colorbar(im, ax=ax, label="Weight Value")
    plt.tight_layout()
    return fig


def create_history_plot() -> plt.Figure:
    """Plot surprise history."""
    fig, ax = plt.subplots(figsize=(8, 3))

    if observation_history:
        surprises = [h["surprise"] for h in observation_history]
        x = range(1, len(surprises) + 1)
        ax.plot(x, surprises, "o-", color="#e74c3c", linewidth=2, markersize=8)
        ax.axhline(y=0.5, color="gray", linestyle="--", alpha=0.5, label="Threshold")
        ax.set_xlabel("Observation #")
        ax.set_ylabel("Surprise")
        ax.set_ylim(0, 1)
        ax.grid(True, alpha=0.3)
        ax.legend()
    else:
        ax.text(0.5, 0.5, "No observations yet", ha="center", va="center", fontsize=12, color="gray")
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)

    ax.set_title("Learning Progress (Surprise Over Time)", fontsize=12, fontweight="bold")
    plt.tight_layout()
    return fig


# =============================================================================
# CORE MEMORY OPERATIONS
# =============================================================================


def observe_content(content: str) -> tuple[str, plt.Figure, plt.Figure]:
    """
    Feed content to REAL neural memory - triggers actual gradient updates.
    """
    if not content.strip():
        return "Please enter content to observe.", None, None

    # Get weight hash BEFORE
    hash_before = memory.get_weight_hash()

    # REAL observation with actual gradient descent
    result = memory.observe(content)

    # Get weight hash AFTER
    hash_after = memory.get_weight_hash()

    # Record history
    observation_history.append({
        "content": content[:50],
        "surprise": result["surprise"],
        "weight_delta": result["weight_delta"],
        "learned": result["learned"],
    })

    # Format result
    weights_changed = hash_before != hash_after
    output = f"""## Observation Result

**Content:** "{content[:100]}{'...' if len(content) > 100 else ''}"

### Metrics (REAL - from PyTorch gradient descent)

| Metric | Value |
|--------|-------|
| **Surprise** | {result['surprise']:.4f} |
| **Weight Delta** | {result['weight_delta']:.6f} |
| **Weights Changed** | {'YES' if weights_changed else 'NO'} |
| **Hash Before** | `{hash_before}` |
| **Hash After** | `{hash_after}` |

### What Just Happened

1. Text was encoded to tensor representation
2. Forward pass through neural memory network
3. **Surprise computed** via prediction error (MSE loss)
4. **Gradients calculated** via `torch.autograd.grad()`
5. **Weights updated** via gradient descent: `param -= lr * grad`

This is REAL test-time training. The neural network's weights physically changed.
"""

    return output, create_weight_visualization(), create_history_plot()


def check_surprise(content: str) -> str:
    """Check surprise WITHOUT learning."""
    if not content.strip():
        return "Please enter content to check."

    # REAL surprise computation (no learning)
    surprise = memory.surprise(content)

    return f"""## Surprise Check (No Learning)

**Content:** "{content[:100]}{'...' if len(content) > 100 else ''}"

**Surprise Score:** {surprise:.4f}

Interpretation:
- **< 0.3**: Very familiar - memory has seen similar patterns
- **0.3 - 0.6**: Moderately novel
- **> 0.6**: Highly novel - worth learning

{'This content is FAMILIAR to the memory.' if surprise < 0.3 else 'This content is NOVEL to the memory.' if surprise > 0.6 else 'This content is somewhat familiar.'}
"""


def get_memory_stats() -> str:
    """Get real memory statistics."""
    stats = memory.get_stats()

    return f"""## Memory Statistics

| Metric | Value |
|--------|-------|
| **Total Observations** | {stats['total_observations']} |
| **Parameters** | {stats['weight_parameters']:,} |
| **Dimension** | {stats['dimension']} |
| **Learning Rate** | {stats['learning_rate']:.4f} |
| **Avg Recent Surprise** | {stats['avg_surprise']:.4f} |
| **Current Weight Hash** | `{memory.get_weight_hash()}` |

### This is a Real Neural Network

- **Architecture**: 2-layer MLP with GELU activation and LayerNorm
- **Framework**: PyTorch with autograd
- **Learning**: Test-time training via gradient descent
- **Memory**: ~{stats['weight_parameters'] * 4 / 1024:.1f} KB of weights

Unlike RAG which stores vectors in a database, this IS the memory.
The weights encode everything learned.
"""


# =============================================================================
# KEY CONCEPTS (New Educational Tab)
# =============================================================================

KEY_CONCEPTS_HTML = '''
<div style="font-family: 'Outfit', system-ui, sans-serif; padding: 20px; color: #f8fafc;">
    <!-- The Problem -->
    <div style="background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; padding: 25px; margin-bottom: 25px; border: 1px solid rgba(252, 129, 129, 0.3);">
        <h3 style="color: #fc8181; margin: 0 0 20px 0; display: flex; align-items: center; gap: 12px;">
            <span style="font-size: 1.5em;">❌</span> The Problem: LLMs Have No Memory
        </h3>
        <div style="display: flex; gap: 20px; flex-wrap: wrap;">
            <div style="flex: 1; min-width: 280px; background: rgba(0,0,0,0.3); border-radius: 12px; padding: 20px;">
                <p style="color: #a0aec0; margin: 0 0 15px 0; font-size: 0.95em;">Every API call to an LLM starts <strong style="color: #fc8181;">fresh</strong>:</p>
                <div style="background: #0a0a1a; border-radius: 8px; padding: 15px; font-family: 'JetBrains Mono', monospace; font-size: 0.85em;">
                    <div style="color: #64748b;">// Call 1</div>
                    <div style="color: #f8fafc;">User: "My name is Carlos"</div>
                    <div style="color: #22c55e;">AI: "Nice to meet you, Carlos!"</div>
                    <br/>
                    <div style="color: #64748b;">// Call 2 (new session)</div>
                    <div style="color: #f8fafc;">User: "What's my name?"</div>
                    <div style="color: #fc8181;">AI: "I don't know your name."</div>
                </div>
            </div>
            <div style="flex: 1; min-width: 280px; background: rgba(0,0,0,0.3); border-radius: 12px; padding: 20px;">
                <p style="color: #a0aec0; margin: 0 0 15px 0; font-size: 0.95em;">The model's weights are <strong style="color: #fc8181;">frozen</strong> after training:</p>
                <ul style="color: #a0aec0; margin: 0; padding-left: 20px; line-height: 1.8;">
                    <li>Can't learn new information</li>
                    <li>Can't remember past conversations</li>
                    <li>Can't adapt to user preferences</li>
                    <li>Knowledge is static (training cutoff)</li>
                </ul>
            </div>
        </div>
    </div>

    <!-- Two Solutions -->
    <h3 style="color: #f8fafc; margin: 30px 0 20px 0; text-align: center; font-size: 1.3em;">
        Two Solutions to Add Memory
    </h3>

    <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(350px, 1fr)); gap: 25px;">
        <!-- RAG Solution -->
        <div style="background: linear-gradient(135deg, rgba(252, 129, 129, 0.1) 0%, rgba(237, 137, 54, 0.1) 100%); border: 2px solid #fc8181; border-radius: 16px; padding: 25px;">
            <h4 style="color: #fc8181; margin: 0 0 15px 0; display: flex; align-items: center; gap: 10px;">
                <span style="font-size: 1.3em;">📚</span> Solution A: RAG (Retrieval)
            </h4>
            <p style="color: #a0aec0; font-size: 0.9em; margin: 0 0 15px 0;">
                <strong>Store</strong> information externally, <strong>retrieve</strong> relevant pieces when needed.
            </p>
            <div style="background: rgba(0,0,0,0.3); border-radius: 10px; padding: 15px; margin-bottom: 15px;">
                <div style="display: flex; align-items: center; gap: 10px; margin-bottom: 10px;">
                    <span style="background: #fc8181; color: #1a1a2e; padding: 4px 10px; border-radius: 4px; font-size: 0.8em; font-weight: 600;">HOW</span>
                </div>
                <ol style="color: #a0aec0; margin: 0; padding-left: 20px; font-size: 0.9em; line-height: 1.7;">
                    <li>Convert facts to vectors (embeddings)</li>
                    <li>Store in vector database</li>
                    <li>On query, find similar vectors</li>
                    <li>Pass retrieved docs to LLM</li>
                </ol>
            </div>
            <div style="display: flex; flex-wrap: wrap; gap: 8px;">
                <span style="background: rgba(252, 129, 129, 0.2); color: #fc8181; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✓ Simple</span>
                <span style="background: rgba(252, 129, 129, 0.2); color: #fc8181; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✓ Scalable</span>
                <span style="background: rgba(100, 116, 139, 0.3); color: #94a3b8; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✗ No patterns</span>
                <span style="background: rgba(100, 116, 139, 0.3); color: #94a3b8; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✗ Grows</span>
            </div>
        </div>

        <!-- Neural Memory Solution -->
        <div style="background: linear-gradient(135deg, rgba(0, 212, 255, 0.1) 0%, rgba(168, 85, 247, 0.1) 100%); border: 2px solid #00d4ff; border-radius: 16px; padding: 25px;">
            <h4 style="color: #00d4ff; margin: 0 0 15px 0; display: flex; align-items: center; gap: 10px;">
                <span style="font-size: 1.3em;">🧠</span> Solution B: Neural Memory (Learning)
            </h4>
            <p style="color: #a0aec0; font-size: 0.9em; margin: 0 0 15px 0;">
                <strong>Learn</strong> information into neural weights. Memory IS the network.
            </p>
            <div style="background: rgba(0,0,0,0.3); border-radius: 10px; padding: 15px; margin-bottom: 15px;">
                <div style="display: flex; align-items: center; gap: 10px; margin-bottom: 10px;">
                    <span style="background: #00d4ff; color: #1a1a2e; padding: 4px 10px; border-radius: 4px; font-size: 0.8em; font-weight: 600;">HOW</span>
                </div>
                <ol style="color: #a0aec0; margin: 0; padding-left: 20px; font-size: 0.9em; line-height: 1.7;">
                    <li>Encode fact as tensor</li>
                    <li>Forward pass through neural net</li>
                    <li>Compute prediction error (surprise)</li>
                    <li><strong style="color: #00d4ff;">Update weights</strong> via gradient descent</li>
                </ol>
            </div>
            <div style="display: flex; flex-wrap: wrap; gap: 8px;">
                <span style="background: rgba(0, 212, 255, 0.2); color: #00d4ff; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✓ Learns patterns</span>
                <span style="background: rgba(0, 212, 255, 0.2); color: #00d4ff; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✓ Fixed size</span>
                <span style="background: rgba(0, 212, 255, 0.2); color: #00d4ff; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✓ Can infer</span>
                <span style="background: rgba(100, 116, 139, 0.3); color: #94a3b8; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✗ Complex</span>
            </div>
        </div>
    </div>

    <!-- Test-Time Training Innovation -->
    <div style="background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; padding: 25px; margin-top: 25px; border: 1px solid rgba(0, 212, 255, 0.3);">
        <h3 style="color: #00d4ff; margin: 0 0 20px 0; display: flex; align-items: center; gap: 12px;">
            <span style="font-size: 1.5em;">⚡</span> The Innovation: Test-Time Training (TTT)
        </h3>
        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 20px;">
            <div style="background: rgba(0,0,0,0.3); border-radius: 12px; padding: 20px;">
                <h5 style="color: #a855f7; margin: 0 0 10px 0;">Traditional Training</h5>
                <p style="color: #a0aec0; font-size: 0.9em; margin: 0; line-height: 1.6;">
                    Train once → Freeze weights → Deploy<br/>
                    <span style="color: #64748b;">Model can't learn after deployment</span>
                </p>
            </div>
            <div style="background: rgba(0, 212, 255, 0.1); border-radius: 12px; padding: 20px; border: 1px solid rgba(0, 212, 255, 0.2);">
                <h5 style="color: #00d4ff; margin: 0 0 10px 0;">Test-Time Training (Titans)</h5>
                <p style="color: #a0aec0; font-size: 0.9em; margin: 0; line-height: 1.6;">
                    Deploy → <strong style="color: #00d4ff;">Continue learning</strong> → Weights update<br/>
                    <span style="color: #22c55e;">Model learns from every interaction</span>
                </p>
            </div>
        </div>
        <div style="margin-top: 20px; padding: 15px; background: rgba(0,0,0,0.3); border-radius: 10px;">
            <p style="color: #a0aec0; margin: 0; font-size: 0.9em;">
                <strong style="color: #f8fafc;">This demo implements real TTT:</strong> When you add a fact, actual PyTorch gradients flow and actual neural network weights change. This is not a simulation—it's the Titans architecture from Google's December 2024 paper.
            </p>
        </div>
    </div>
</div>
'''

# =============================================================================
# INCREMENTAL INTEGRATION DIAGRAMS
# =============================================================================

VANILLA_LLM_DIAGRAM_HTML = '''
<div style="font-family: 'Outfit', system-ui, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff; margin-bottom: 20px; border: 1px solid rgba(148, 163, 184, 0.2);">
    <h4 style="color: #94a3b8; margin: 0 0 20px 0; display: flex; align-items: center; gap: 10px;">
        <span style="background: #374151; color: #f8fafc; padding: 4px 12px; border-radius: 6px; font-size: 0.8em;">Step 1</span>
        Vanilla LLM (The Problem)
    </h4>
    <div style="display: flex; align-items: center; justify-content: center; gap: 20px; flex-wrap: wrap;">
        <div style="background: #2d3748; padding: 20px 30px; border-radius: 12px; text-align: center;">
            <div style="font-size: 32px; margin-bottom: 10px;">👤</div>
            <div style="color: #f8fafc; font-weight: 500;">User Query</div>
            <div style="color: #64748b; font-size: 0.85em;">"What's my preference?"</div>
        </div>
        <div style="color: #64748b; font-size: 32px;">→</div>
        <div style="background: linear-gradient(135deg, #805ad5 0%, #553c9a 100%); padding: 20px 30px; border-radius: 12px; text-align: center; border: 2px solid #d6bcfa;">
            <div style="font-size: 32px; margin-bottom: 10px;">🤖</div>
            <div style="color: #f8fafc; font-weight: 600;">LLM</div>
            <div style="color: #e9d8fd; font-size: 0.85em;">Frozen weights</div>
        </div>
        <div style="color: #64748b; font-size: 32px;">→</div>
        <div style="background: rgba(252, 129, 129, 0.2); padding: 20px 30px; border-radius: 12px; text-align: center; border: 2px solid #fc8181;">
            <div style="font-size: 32px; margin-bottom: 10px;">❓</div>
            <div style="color: #fc8181; font-weight: 500;">No Memory</div>
            <div style="color: #a0aec0; font-size: 0.85em;">"I don't know"</div>
        </div>
    </div>
    <div style="margin-top: 15px; padding: 12px; background: rgba(252, 129, 129, 0.1); border-radius: 8px; text-align: center;">
        <span style="color: #fc8181; font-size: 0.9em;">⚠️ LLM has no way to remember user-specific information between sessions</span>
    </div>
</div>
'''

RAG_INTEGRATION_DIAGRAM_HTML = '''
<div style="font-family: 'Outfit', system-ui, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff; margin-bottom: 20px; border: 1px solid rgba(255, 140, 66, 0.3);">
    <h4 style="color: #ff8c42; margin: 0 0 20px 0; display: flex; align-items: center; gap: 10px;">
        <span style="background: #ff8c42; color: #1a1a2e; padding: 4px 12px; border-radius: 6px; font-size: 0.8em;">Step 2a</span>
        Adding RAG (Retrieval-Augmented Generation)
    </h4>
    <div style="display: flex; align-items: center; justify-content: center; gap: 15px; flex-wrap: wrap;">
        <div style="background: #2d3748; padding: 15px 20px; border-radius: 10px; text-align: center;">
            <div style="font-size: 24px;">👤</div>
            <div style="color: #f8fafc; font-size: 0.9em;">Query</div>
        </div>
        <div style="color: #ff8c42; font-size: 24px;">→</div>
        <div style="background: rgba(255, 140, 66, 0.2); padding: 15px 20px; border-radius: 10px; text-align: center; border: 1px dashed #ff8c42;">
            <div style="font-size: 24px;">🔍</div>
            <div style="color: #ff8c42; font-size: 0.9em;">Retriever</div>
            <div style="color: #64748b; font-size: 0.75em;">keyword match</div>
        </div>
        <div style="color: #ff8c42; font-size: 24px;">→</div>
        <div style="background: #744210; padding: 15px 20px; border-radius: 10px; text-align: center;">
            <div style="font-size: 24px;">🗃️</div>
            <div style="color: #faf089; font-size: 0.9em;">Vector DB</div>
            <div style="color: #64748b; font-size: 0.75em;">top-k docs</div>
        </div>
        <div style="color: #ff8c42; font-size: 24px;">→</div>
        <div style="background: #3182ce; padding: 15px 20px; border-radius: 10px; text-align: center;">
            <div style="font-size: 24px;">📋</div>
            <div style="color: #bee3f8; font-size: 0.9em;">Context</div>
            <div style="color: #64748b; font-size: 0.75em;">prompt injection</div>
        </div>
        <div style="color: #ff8c42; font-size: 24px;">→</div>
        <div style="background: linear-gradient(135deg, #805ad5 0%, #553c9a 100%); padding: 15px 20px; border-radius: 10px; text-align: center;">
            <div style="font-size: 24px;">🤖</div>
            <div style="color: #f8fafc; font-size: 0.9em;">LLM</div>
        </div>
    </div>
    <div style="margin-top: 15px; display: grid; grid-template-columns: 1fr 1fr; gap: 10px;">
        <div style="padding: 10px; background: rgba(34, 197, 94, 0.1); border-radius: 6px;">
            <span style="color: #22c55e; font-size: 0.85em;">✓ External memory storage</span>
        </div>
        <div style="padding: 10px; background: rgba(252, 129, 129, 0.1); border-radius: 6px;">
            <span style="color: #fc8181; font-size: 0.85em;">✗ No pattern learning</span>
        </div>
    </div>
</div>
'''

NEURAL_MEMORY_INTEGRATION_DIAGRAM_HTML = '''
<div style="font-family: 'Outfit', system-ui, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff; margin-bottom: 20px; border: 1px solid rgba(0, 212, 255, 0.3);">
    <h4 style="color: #00d4ff; margin: 0 0 20px 0; display: flex; align-items: center; gap: 10px;">
        <span style="background: #00d4ff; color: #1a1a2e; padding: 4px 12px; border-radius: 6px; font-size: 0.8em;">Step 2b</span>
        Adding Neural Memory (Test-Time Training)
    </h4>
    <div style="display: flex; align-items: center; justify-content: center; gap: 15px; flex-wrap: wrap;">
        <div style="background: #2d3748; padding: 15px 20px; border-radius: 10px; text-align: center;">
            <div style="font-size: 24px;">👤</div>
            <div style="color: #f8fafc; font-size: 0.9em;">Query</div>
        </div>
        <div style="color: #00d4ff; font-size: 24px;">→</div>
        <div style="background: rgba(0, 212, 255, 0.2); padding: 15px 20px; border-radius: 10px; text-align: center; border: 2px solid #00d4ff;">
            <div style="font-size: 24px;">🧠</div>
            <div style="color: #00d4ff; font-size: 0.9em; font-weight: 600;">Neural Memory</div>
            <div style="color: #64748b; font-size: 0.75em;">TTT Module</div>
        </div>
        <div style="color: #00d4ff; font-size: 24px;">→</div>
        <div style="background: #2f855a; padding: 15px 20px; border-radius: 10px; text-align: center;">
            <div style="font-size: 24px;">📊</div>
            <div style="color: #9ae6b4; font-size: 0.9em;">Patterns</div>
            <div style="color: #64748b; font-size: 0.75em;">+ surprise</div>
        </div>
        <div style="color: #00d4ff; font-size: 24px;">→</div>
        <div style="background: #3182ce; padding: 15px 20px; border-radius: 10px; text-align: center;">
            <div style="font-size: 24px;">📋</div>
            <div style="color: #bee3f8; font-size: 0.9em;">Rich Context</div>
            <div style="color: #64748b; font-size: 0.75em;">all facts + hints</div>
        </div>
        <div style="color: #00d4ff; font-size: 24px;">→</div>
        <div style="background: linear-gradient(135deg, #805ad5 0%, #553c9a 100%); padding: 15px 20px; border-radius: 10px; text-align: center;">
            <div style="font-size: 24px;">🤖</div>
            <div style="color: #f8fafc; font-size: 0.9em;">LLM</div>
        </div>
    </div>
    <div style="margin-top: 15px; display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 10px;">
        <div style="padding: 10px; background: rgba(34, 197, 94, 0.1); border-radius: 6px;">
            <span style="color: #22c55e; font-size: 0.85em;">✓ Learns patterns</span>
        </div>
        <div style="padding: 10px; background: rgba(34, 197, 94, 0.1); border-radius: 6px;">
            <span style="color: #22c55e; font-size: 0.85em;">✓ Fixed memory size</span>
        </div>
        <div style="padding: 10px; background: rgba(34, 197, 94, 0.1); border-radius: 6px;">
            <span style="color: #22c55e; font-size: 0.85em;">✓ Can infer/predict</span>
        </div>
    </div>
</div>
'''

DOCKER_DEPLOYMENT_DIAGRAM_HTML = '''
<div style="font-family: 'Outfit', system-ui, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff; border: 1px solid rgba(168, 85, 247, 0.3);">
    <h4 style="color: #a855f7; margin: 0 0 20px 0; display: flex; align-items: center; gap: 10px;">
        <span style="background: #a855f7; color: #1a1a2e; padding: 4px 12px; border-radius: 6px; font-size: 0.8em;">Step 3</span>
        Docker Deployment (Production Ready)
    </h4>
    <div style="display: flex; align-items: stretch; justify-content: center; gap: 20px; flex-wrap: wrap;">
        <!-- Docker Container -->
        <div style="background: rgba(168, 85, 247, 0.1); border: 2px solid #a855f7; border-radius: 12px; padding: 20px; min-width: 280px;">
            <div style="display: flex; align-items: center; gap: 10px; margin-bottom: 15px;">
                <span style="font-size: 1.5em;">🐳</span>
                <span style="color: #a855f7; font-weight: 600;">Docker Container</span>
            </div>
            <div style="display: flex; flex-direction: column; gap: 10px;">
                <div style="background: rgba(0, 212, 255, 0.2); padding: 10px; border-radius: 8px; border: 1px solid rgba(0, 212, 255, 0.3);">
                    <div style="color: #00d4ff; font-size: 0.85em; font-weight: 500;">🧠 Neural Memory</div>
                    <div style="color: #64748b; font-size: 0.75em;">PyTorch TTT Module</div>
                </div>
                <div style="background: rgba(34, 197, 94, 0.2); padding: 10px; border-radius: 8px; border: 1px solid rgba(34, 197, 94, 0.3);">
                    <div style="color: #22c55e; font-size: 0.85em; font-weight: 500;">🔌 MCP Server</div>
                    <div style="color: #64748b; font-size: 0.75em;">Claude Desktop Integration</div>
                </div>
                <div style="background: rgba(255, 140, 66, 0.2); padding: 10px; border-radius: 8px; border: 1px solid rgba(255, 140, 66, 0.3);">
                    <div style="color: #ff8c42; font-size: 0.85em; font-weight: 500;">🌐 HTTP API</div>
                    <div style="color: #64748b; font-size: 0.75em;">REST Endpoints</div>
                </div>
            </div>
        </div>
        <!-- Volume -->
        <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; gap: 10px;">
            <div style="color: #64748b; font-size: 24px;">↔</div>
            <div style="background: #374151; padding: 15px 20px; border-radius: 10px; text-align: center;">
                <div style="font-size: 24px;">💾</div>
                <div style="color: #f8fafc; font-size: 0.9em;">Volume</div>
                <div style="color: #64748b; font-size: 0.75em;">Checkpoints</div>
            </div>
        </div>
    </div>
    <div style="margin-top: 20px; padding: 15px; background: rgba(0,0,0,0.3); border-radius: 10px;">
        <div style="color: #a0aec0; font-size: 0.9em;">
            <strong style="color: #a855f7;">Why Docker?</strong> Learned neural weights persist across container restarts via Docker volumes. Deploy anywhere with identical behavior. Version control your AI's memory state like Git commits.
        </div>
    </div>
</div>
'''

# =============================================================================
# DOCKER ECOSYSTEM INTEGRATION
# =============================================================================

DOCKER_INTEGRATION_MD = """
## Docker Ecosystem Integration

This neural memory is designed for **containerized deployment** with full Docker integration.

### MCP Server Interface

The memory exposes tools via Model Context Protocol (MCP):

```python
# MCP Tools Available
@mcp.tool()
def observe(content: str) -> dict:
    '''Feed context, trigger learning.'''
    return memory.observe(content)

@mcp.tool()
def surprise(content: str) -> float:
    '''Measure novelty without learning.'''
    return memory.surprise(content)

@mcp.tool()
def checkpoint(name: str) -> str:
    '''Save learned state to Docker volume.'''
    return save_checkpoint(name)

@mcp.tool()
def restore(name: str) -> str:
    '''Load previous state from Docker volume.'''
    return load_checkpoint(name)
```

### Docker Compose Deployment

```yaml
version: '3.8'
services:
  neural-memory:
    build: .
    ports:
      - "8000:8000"  # MCP server
    volumes:
      - memory-state:/app/checkpoints  # Persistent state
    environment:
      - MEMORY_DIM=512
      - LEARNING_RATE=0.01

volumes:
  memory-state:  # State survives container restarts
```

### Key Docker-Native Features

| Feature | Implementation |
|---------|---------------|
| **State Persistence** | Docker volumes for checkpoints |
| **Horizontal Scaling** | Stateless inference, shared state via volume |
| **CI/CD Integration** | GitHub Actions with Docker build |
| **Resource Control** | Container limits for GPU/memory |
| **Health Checks** | `/health` endpoint with memory stats |

### Why Docker + Neural Memory?

1. **Containerized AI Memory**: Package learned state with your app
2. **Version Control**: Checkpoint states like Git commits
3. **Reproducibility**: Same container = same behavior
4. **Orchestration Ready**: Deploy to Kubernetes, ECS, etc.
5. **MCP Protocol**: Claude Desktop integration via container

---

*This project demonstrates production-grade AI infrastructure with Docker.*
"""

# =============================================================================
# ARCHITECTURE DIAGRAMS (How It Works)
# =============================================================================

ARCHITECTURE_INTRO_MD = """
## How It Works: Neural Memory vs RAG Architecture

This section provides a detailed look at how both systems process information and connect to the LLM.
The diagrams below are **faithful representations of our actual implementation**.

---
"""

NEURAL_MEMORY_DIAGRAM_HTML = """
<div style="font-family: system-ui, -apple-system, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff;">
    <h3 style="text-align: center; color: #00d4ff; margin-bottom: 30px; font-size: 1.5em;">
        Neural Memory Pipeline (Test-Time Training)
    </h3>

    <!-- Main Flow -->
    <div style="display: flex; flex-direction: column; gap: 20px; max-width: 900px; margin: 0 auto;">

        <!-- Phase 1: Learning Phase -->
        <div style="background: rgba(0, 212, 255, 0.1); border: 2px solid #00d4ff; border-radius: 12px; padding: 20px;">
            <h4 style="color: #00d4ff; margin: 0 0 15px 0;">Phase 1: Learning (When Facts Are Added)</h4>

            <div style="display: flex; align-items: center; gap: 15px; flex-wrap: wrap; justify-content: center;">
                <!-- Input -->
                <div style="background: #2d3748; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 120px;">
                    <div style="font-size: 24px;">📝</div>
                    <div style="font-weight: bold; color: #fff;">User Fact</div>
                    <div style="font-size: 11px; color: #a0aec0;">"Carlos rejected<br/>bright colors"</div>
                </div>

                <div style="color: #00d4ff; font-size: 24px;">→</div>

                <!-- Encode -->
                <div style="background: #553c9a; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px;">
                    <div style="font-size: 24px;">🔢</div>
                    <div style="font-weight: bold; color: #fff;">_encode_text()</div>
                    <div style="font-size: 11px; color: #d6bcfa;">Tensor [1, 64, 256]</div>
                </div>

                <div style="color: #00d4ff; font-size: 24px;">→</div>

                <!-- Forward Pass -->
                <div style="background: #2f855a; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px;">
                    <div style="font-size: 24px;">🧠</div>
                    <div style="font-weight: bold; color: #fff;">memory_net(x)</div>
                    <div style="font-size: 11px; color: #9ae6b4;">2-layer MLP<br/>~250K params</div>
                </div>

                <div style="color: #00d4ff; font-size: 24px;">→</div>

                <!-- Compute Loss -->
                <div style="background: #c53030; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px;">
                    <div style="font-size: 24px;">📊</div>
                    <div style="font-weight: bold; color: #fff;">MSE Loss</div>
                    <div style="font-size: 11px; color: #feb2b2;">Surprise Score<br/>= Prediction Error</div>
                </div>

                <div style="color: #00d4ff; font-size: 24px;">→</div>

                <!-- Gradient Descent -->
                <div style="background: #d69e2e; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 160px; border: 3px solid #faf089;">
                    <div style="font-size: 24px;">⚡</div>
                    <div style="font-weight: bold; color: #1a202c;">WEIGHT UPDATE</div>
                    <div style="font-size: 11px; color: #744210;">torch.autograd.grad()<br/>param -= lr × grad</div>
                </div>
            </div>

            <div style="margin-top: 15px; padding: 10px; background: rgba(0,0,0,0.3); border-radius: 8px; font-size: 12px; color: #a0aec0;">
                <strong style="color: #00d4ff;">Key Point:</strong> The neural network's weights physically change after each fact.
                This is real gradient descent happening at inference time (Test-Time Training / Titans architecture).
            </div>
        </div>

        <!-- Phase 2: Query Phase -->
        <div style="background: rgba(72, 187, 120, 0.1); border: 2px solid #48bb78; border-radius: 12px; padding: 20px;">
            <h4 style="color: #48bb78; margin: 0 0 15px 0;">Phase 2: Query (When Questions Are Asked)</h4>

            <div style="display: flex; align-items: center; gap: 15px; flex-wrap: wrap; justify-content: center;">
                <!-- Question -->
                <div style="background: #2d3748; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 120px;">
                    <div style="font-size: 24px;">❓</div>
                    <div style="font-weight: bold; color: #fff;">Question</div>
                    <div style="font-size: 11px; color: #a0aec0;">"Will Carlos<br/>like neon?"</div>
                </div>

                <div style="color: #48bb78; font-size: 24px;">→</div>

                <!-- Surprise Check -->
                <div style="background: #553c9a; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 130px;">
                    <div style="font-size: 24px;">🎯</div>
                    <div style="font-weight: bold; color: #fff;">surprise()</div>
                    <div style="font-size: 11px; color: #d6bcfa;">Novelty Score<br/>(No Learning)</div>
                </div>

                <div style="color: #48bb78; font-size: 24px;">→</div>

                <!-- Context Builder -->
                <div style="background: #2f855a; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 160px;">
                    <div style="font-size: 24px;">📦</div>
                    <div style="font-weight: bold; color: #fff;">Build Context</div>
                    <div style="font-size: 11px; color: #9ae6b4;"><strong>ALL facts</strong><br/>+ Pattern hints<br/>+ Surprise score</div>
                </div>

                <div style="color: #48bb78; font-size: 24px;">→</div>

                <!-- System Prompt -->
                <div style="background: #3182ce; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 180px;">
                    <div style="font-size: 24px;">💬</div>
                    <div style="font-weight: bold; color: #fff;">System Prompt</div>
                    <div style="font-size: 10px; color: #bee3f8; text-align: left; margin-top: 5px;">
                        "You have LEARNED from:<br/>
                        • All 4 observations<br/>
                        • Identify PATTERNS<br/>
                        • Make INFERENCES"
                    </div>
                </div>

                <div style="color: #48bb78; font-size: 24px;">→</div>

                <!-- LLM -->
                <div style="background: linear-gradient(135deg, #805ad5 0%, #553c9a 100%); padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px; border: 3px solid #d6bcfa;">
                    <div style="font-size: 24px;">🤖</div>
                    <div style="font-weight: bold; color: #fff;">LLM</div>
                    <div style="font-size: 11px; color: #e9d8fd;">SmolLM3-3B<br/>(HuggingFace)</div>
                </div>
            </div>

            <div style="margin-top: 15px; padding: 10px; background: rgba(0,0,0,0.3); border-radius: 8px; font-size: 12px; color: #a0aec0;">
                <strong style="color: #48bb78;">Key Point:</strong> The LLM receives ALL facts + learned pattern hints + novelty indicator.
                It's instructed to identify patterns and make inferences, not just retrieve.
            </div>
        </div>

    </div>
</div>
"""

RAG_DIAGRAM_HTML = """
<div style="font-family: system-ui, -apple-system, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff; margin-top: 20px;">
    <h3 style="text-align: center; color: #fc8181; margin-bottom: 30px; font-size: 1.5em;">
        RAG Pipeline (Retrieval-Augmented Generation)
    </h3>

    <!-- Main Flow -->
    <div style="display: flex; flex-direction: column; gap: 20px; max-width: 900px; margin: 0 auto;">

        <!-- Phase 1: Storage Phase -->
        <div style="background: rgba(252, 129, 129, 0.1); border: 2px solid #fc8181; border-radius: 12px; padding: 20px;">
            <h4 style="color: #fc8181; margin: 0 0 15px 0;">Phase 1: Storage (When Facts Are Added)</h4>

            <div style="display: flex; align-items: center; gap: 15px; flex-wrap: wrap; justify-content: center;">
                <!-- Input -->
                <div style="background: #2d3748; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 120px;">
                    <div style="font-size: 24px;">📝</div>
                    <div style="font-weight: bold; color: #fff;">User Fact</div>
                    <div style="font-size: 11px; color: #a0aec0;">"Carlos rejected<br/>bright colors"</div>
                </div>

                <div style="color: #fc8181; font-size: 24px;">→</div>

                <!-- Append to List -->
                <div style="background: #744210; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 180px;">
                    <div style="font-size: 24px;">📋</div>
                    <div style="font-weight: bold; color: #fff;">knowledge_base.append()</div>
                    <div style="font-size: 11px; color: #faf089;">Simple list storage<br/>No transformation</div>
                </div>

                <div style="color: #fc8181; font-size: 24px;">→</div>

                <!-- Storage -->
                <div style="background: #2d3748; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px; border: 2px dashed #a0aec0;">
                    <div style="font-size: 24px;">🗃️</div>
                    <div style="font-weight: bold; color: #fff;">Document Store</div>
                    <div style="font-size: 11px; color: #a0aec0;">List of strings<br/>Grows with data</div>
                </div>
            </div>

            <div style="margin-top: 15px; padding: 10px; background: rgba(0,0,0,0.3); border-radius: 8px; font-size: 12px; color: #a0aec0;">
                <strong style="color: #fc8181;">Key Point:</strong> Facts are simply stored as-is. <strong>No learning occurs.</strong>
                The system doesn't understand relationships or patterns between facts.
            </div>
        </div>

        <!-- Phase 2: Retrieval Phase -->
        <div style="background: rgba(237, 137, 54, 0.1); border: 2px solid #ed8936; border-radius: 12px; padding: 20px;">
            <h4 style="color: #ed8936; margin: 0 0 15px 0;">Phase 2: Query (When Questions Are Asked)</h4>

            <div style="display: flex; align-items: center; gap: 15px; flex-wrap: wrap; justify-content: center;">
                <!-- Question -->
                <div style="background: #2d3748; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 120px;">
                    <div style="font-size: 24px;">❓</div>
                    <div style="font-weight: bold; color: #fff;">Question</div>
                    <div style="font-size: 11px; color: #a0aec0;">"Will Carlos<br/>like neon?"</div>
                </div>

                <div style="color: #ed8936; font-size: 24px;">→</div>

                <!-- Tokenize -->
                <div style="background: #553c9a; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 130px;">
                    <div style="font-size: 24px;">✂️</div>
                    <div style="font-weight: bold; color: #fff;">Tokenize</div>
                    <div style="font-size: 11px; color: #d6bcfa;">Split into words<br/>{"will", "carlos",<br/>"like", "neon"}</div>
                </div>

                <div style="color: #ed8936; font-size: 24px;">→</div>

                <!-- Keyword Match -->
                <div style="background: #c53030; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 160px;">
                    <div style="font-size: 24px;">🔍</div>
                    <div style="font-weight: bold; color: #fff;">Keyword Overlap</div>
                    <div style="font-size: 11px; color: #feb2b2;">Count matching words<br/>between Q and each fact</div>
                </div>

                <div style="color: #ed8936; font-size: 24px;">→</div>

                <!-- Top-K -->
                <div style="background: #744210; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 130px; border: 3px solid #faf089;">
                    <div style="font-size: 24px;">🏆</div>
                    <div style="font-weight: bold; color: #fff;">Top-2 Facts</div>
                    <div style="font-size: 11px; color: #faf089;">Only highest<br/>overlap scores</div>
                </div>

                <div style="color: #ed8936; font-size: 24px;">→</div>

                <!-- System Prompt -->
                <div style="background: #3182ce; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 180px;">
                    <div style="font-size: 24px;">💬</div>
                    <div style="font-weight: bold; color: #fff;">System Prompt</div>
                    <div style="font-size: 10px; color: #bee3f8; text-align: left; margin-top: 5px;">
                        "You are a RAG system.<br/>
                        ONLY use retrieved facts.<br/>
                        If not covered, say so."
                    </div>
                </div>

                <div style="color: #ed8936; font-size: 24px;">→</div>

                <!-- LLM -->
                <div style="background: linear-gradient(135deg, #805ad5 0%, #553c9a 100%); padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px; border: 3px solid #d6bcfa;">
                    <div style="font-size: 24px;">🤖</div>
                    <div style="font-weight: bold; color: #fff;">LLM</div>
                    <div style="font-size: 11px; color: #e9d8fd;">SmolLM3-3B<br/>(Same model!)</div>
                </div>
            </div>

            <div style="margin-top: 15px; padding: 10px; background: rgba(0,0,0,0.3); border-radius: 8px; font-size: 12px; color: #a0aec0;">
                <strong style="color: #ed8936;">Key Point:</strong> The LLM only sees 2 retrieved facts (not all 4).
                "neon" ≠ "bright" keyword-wise, so relevant facts may not be retrieved!
            </div>
        </div>

    </div>
</div>
"""

LLM_INTEGRATION_MD = """
---

## How Each System Connects to the LLM

Both systems use the **exact same LLM** (HuggingFace SmolLM3-3B). The difference is **what context they provide**.

### Neural Memory → LLM Connection

```
┌─────────────────────────────────────────────────────────────────────┐
│                    SYSTEM PROMPT (Neural Memory)                      │
├─────────────────────────────────────────────────────────────────────┤
│ "You are an AI with neural memory that has LEARNED from all         │
│  observations below. Unlike simple retrieval, you should:           │
│                                                                      │
│  1. Consider ALL facts holistically                                  │
│  2. Identify PATTERNS across multiple observations                   │
│  3. Make INFERENCES based on learned patterns                        │
│  4. Predict based on trends, not just direct matches                 │
│                                                                      │
│  Observations (learned knowledge):                                   │
│  - Carlos rejected the bright colorful design                        │
│  - Carlos rejected the flashy animated homepage                      │
│  - Carlos approved the minimalist dark layout                        │
│  - Carlos approved the clean monochrome interface                    │
│                                                                      │
│  Learned patterns from observations:                                 │
│  - Positive signals: 2 approvals                                     │
│  - Negative signals: 2 rejections                                    │
│  - Look for common themes in approved vs rejected items              │
│                                                                      │
│  Question novelty (surprise score): 0.89                             │
│  - High surprise (>0.6): This is a novel topic, be cautious"         │
├─────────────────────────────────────────────────────────────────────┤
│ USER: "We have a new UI mockup with neon colors - will Carlos       │
│        like it?"                                                     │
└─────────────────────────────────────────────────────────────────────┘
```

**What the Neural Memory provides:**
| Component | Purpose |
|-----------|---------|
| **ALL facts** | Complete context for holistic reasoning |
| **Pattern hints** | Extracted approval/rejection counts |
| **Surprise score** | Indicates if question is familiar or novel |
| **Inference instructions** | Tells LLM to identify patterns and predict |

---

### RAG → LLM Connection

```
┌─────────────────────────────────────────────────────────────────────┐
│                       SYSTEM PROMPT (RAG)                            │
├─────────────────────────────────────────────────────────────────────┤
│ "You are a RAG system. You can ONLY use the retrieved facts below   │
│  to answer. If the retrieved facts don't directly answer the        │
│  question, say 'The retrieved information doesn't cover this.'      │
│                                                                      │
│  Retrieved facts:                                                    │
│  - Carlos rejected the bright colorful design                        │
│  (Only 1 fact retrieved - 'neon' didn't match other keywords!)      │
├─────────────────────────────────────────────────────────────────────┤
│ USER: "We have a new UI mockup with neon colors - will Carlos       │
│        like it?"                                                     │
└─────────────────────────────────────────────────────────────────────┘
```

**What RAG provides:**
| Component | Purpose |
|-----------|---------|
| **Top-2 facts only** | Limited context based on keyword overlap |
| **No pattern info** | System doesn't analyze relationships |
| **No novelty signal** | No indication of question familiarity |
| **Strict retrieval instructions** | Tells LLM to only use retrieved facts |

---

## The Critical Difference: What Goes Into the LLM

"""

COMPARISON_TABLE_HTML = """
<div style="font-family: system-ui, -apple-system, sans-serif; padding: 20px; background: #1a1a2e; border-radius: 16px; color: #fff; margin: 20px 0;">
    <h3 style="text-align: center; color: #fff; margin-bottom: 20px;">Side-by-Side: What the LLM Receives</h3>

    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
        <!-- Neural Memory Column -->
        <div style="background: rgba(0, 212, 255, 0.1); border: 2px solid #00d4ff; border-radius: 12px; padding: 20px;">
            <h4 style="color: #00d4ff; text-align: center; margin: 0 0 15px 0;">🧠 Neural Memory</h4>

            <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;">
                <div style="color: #48bb78; font-weight: bold; margin-bottom: 8px;">✅ Facts Provided:</div>
                <div style="font-size: 13px; color: #a0aec0;">ALL 4 facts (complete knowledge)</div>
            </div>

            <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;">
                <div style="color: #48bb78; font-weight: bold; margin-bottom: 8px;">✅ Pattern Analysis:</div>
                <div style="font-size: 13px; color: #a0aec0;">
                    • 2 approvals identified<br/>
                    • 2 rejections identified<br/>
                    • "Look for common themes"
                </div>
            </div>

            <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;">
                <div style="color: #48bb78; font-weight: bold; margin-bottom: 8px;">✅ Novelty Signal:</div>
                <div style="font-size: 13px; color: #a0aec0;">Surprise score: 0.89 (novel topic)</div>
            </div>

            <div style="background: #2d3748; border-radius: 8px; padding: 15px;">
                <div style="color: #48bb78; font-weight: bold; margin-bottom: 8px;">✅ Instructions:</div>
                <div style="font-size: 13px; color: #a0aec0;">
                    "Identify PATTERNS"<br/>
                    "Make INFERENCES"<br/>
                    "Predict based on trends"
                </div>
            </div>
        </div>

        <!-- RAG Column -->
        <div style="background: rgba(252, 129, 129, 0.1); border: 2px solid #fc8181; border-radius: 12px; padding: 20px;">
            <h4 style="color: #fc8181; text-align: center; margin: 0 0 15px 0;">📚 RAG</h4>

            <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;">
                <div style="color: #fc8181; font-weight: bold; margin-bottom: 8px;">⚠️ Facts Provided:</div>
                <div style="font-size: 13px; color: #a0aec0;">Only 1-2 facts (keyword match)<br/>
                <span style="color: #fc8181; font-size: 11px;">"neon" ≠ "bright", "colorful", etc.</span></div>
            </div>

            <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;">
                <div style="color: #fc8181; font-weight: bold; margin-bottom: 8px;">❌ Pattern Analysis:</div>
                <div style="font-size: 13px; color: #a0aec0;">None - no relationship detection</div>
            </div>

            <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;">
                <div style="color: #fc8181; font-weight: bold; margin-bottom: 8px;">❌ Novelty Signal:</div>
                <div style="font-size: 13px; color: #a0aec0;">None - no familiarity indicator</div>
            </div>

            <div style="background: #2d3748; border-radius: 8px; padding: 15px;">
                <div style="color: #fc8181; font-weight: bold; margin-bottom: 8px;">⚠️ Instructions:</div>
                <div style="font-size: 13px; color: #a0aec0;">
                    "ONLY use retrieved facts"<br/>
                    "If not covered, say so"<br/>
                    <span style="color: #fc8181; font-size: 11px;">No inference allowed</span>
                </div>
            </div>
        </div>
    </div>
</div>
"""

ARCHITECTURE_SUMMARY_MD = """
---

## Why This Architecture Matters

### The Learning Advantage

| Aspect | Neural Memory | RAG |
|--------|---------------|-----|
| **Storage** | Fixed neural weights (~250K params) | Growing document store |
| **Learning** | Yes - weights update per observation | No - just stores text |
| **Retrieval** | Not needed - patterns in weights | Required - keyword matching |
| **Inference** | Can generalize to novel queries | Limited to direct matches |
| **Memory Size** | Constant (doesn't grow) | Linear growth with data |

### When Neural Memory Wins

The architecture shines when:
1. **Pattern Recognition Required** - "Carlos likes X, dislikes Y" → predict for Z
2. **Novel Queries** - Question keywords don't match stored facts
3. **Holistic Reasoning** - Answer requires synthesizing multiple facts
4. **Bounded Memory** - Can't afford growing storage

### When RAG Might Be Better

RAG is simpler when:
1. **Exact Retrieval** - "What did the document say about X?"
2. **Large Corpus** - Millions of documents to search
3. **No Patterns** - Facts are independent, not related
4. **Transparency** - Need to cite exact source documents

---

## Technical Implementation Details

### Neural Memory Architecture

```
Input Text
    │
    ▼
┌─────────────────────────────────────────────┐
│  _encode_text(text)                          │
│  ┌─────────────────────────────────────────┐ │
│  │ 1. Convert to ASCII ordinals            │ │
│  │ 2. Pad/truncate to max_seq_len (64)     │ │
│  │ 3. Project to dimension (256)           │ │
│  │ 4. Output: Tensor [1, 64, 256]          │ │
│  └─────────────────────────────────────────┘ │
└─────────────────────────────────────────────┘
    │
    ▼
┌─────────────────────────────────────────────┐
│  memory_net (nn.Sequential)                  │
│  ┌─────────────────────────────────────────┐ │
│  │ Linear(256 → 1024)                      │ │
│  │ GELU activation                         │ │
│  │ LayerNorm(1024)                         │ │
│  │ Linear(1024 → 256)                      │ │
│  └─────────────────────────────────────────┘ │
│  Total: ~262K parameters                     │
└─────────────────────────────────────────────┘
    │
    ▼
┌─────────────────────────────────────────────┐
│  _compute_surprise_tensor(input, output)     │
│  ┌─────────────────────────────────────────┐ │
│  │ loss = MSE(output, target)              │ │
│  │ surprise = sigmoid(loss) scaled to 0-1  │ │
│  └─────────────────────────────────────────┘ │
└─────────────────────────────────────────────┘
    │
    ▼
┌─────────────────────────────────────────────┐
│  _update_weights(loss) [IF learn=True]       │
│  ┌─────────────────────────────────────────┐ │
│  │ grads = torch.autograd.grad(loss, θ)    │ │
│  │ for each (param, grad):                 │ │
│  │     param -= learning_rate × grad       │ │
│  └─────────────────────────────────────────┘ │
│  ⚡ This is the key innovation!              │
└─────────────────────────────────────────────┘
```

### RAG Architecture (Simplified for Demo)

```
Input Text
    │
    ▼
┌─────────────────────────────────────────────┐
│  knowledge_base.append({"fact": text, ...})  │
│  Simple list storage - no transformation     │
└─────────────────────────────────────────────┘

Query
    │
    ▼
┌─────────────────────────────────────────────┐
│  Keyword Overlap Scoring                     │
│  ┌─────────────────────────────────────────┐ │
│  │ question_words = set(query.split())     │ │
│  │ for fact in knowledge_base:             │ │
│  │     fact_words = set(fact.split())      │ │
│  │     score = len(question_words ∩ fact_  │ │
│  │              words)                      │ │
│  └─────────────────────────────────────────┘ │
└─────────────────────────────────────────────┘
    │
    ▼
┌─────────────────────────────────────────────┐
│  Top-K Selection (K=2 in our demo)           │
│  Return facts with highest overlap scores    │
└─────────────────────────────────────────────┘
```

---

*These diagrams represent the actual implementation in this demo. The code is open source.*
"""

ABOUT_MD = """
## About This Project

### What Makes This Special

This is **NOT a simulation**. The demo runs real PyTorch code:

1. **Real Neural Network**: 2-layer MLP with ~250K parameters
2. **Real Gradient Descent**: `torch.autograd.grad()` computes gradients
3. **Real Weight Updates**: Parameters change during inference
4. **Real Surprise Metric**: MSE loss measures prediction error

### The Titans Architecture

Based on Google's December 2024 paper: [arxiv.org/abs/2501.00663](https://arxiv.org/abs/2501.00663)

**Key Innovation**: The memory IS a neural network. Instead of storing vectors,
it learns patterns by updating weights during inference.

### Docker Integration

- **MCP Server**: Model Context Protocol for Claude Desktop
- **Checkpoints**: Save/restore learned state via Docker volumes
- **Container-Native**: Designed for orchestrated deployment

---

## Limitations

This is a **demonstration project**, not a production-ready system:

| Component | Current State | Production Would Need |
|-----------|---------------|----------------------|
| **RAG Implementation** | Simplified keyword matching | Vector embeddings + semantic search (FAISS, Pinecone) |
| **Neural Memory** | Basic 2-layer MLP | Deeper architecture, attention mechanisms |
| **Scalability** | Single-user demo | Distributed inference, GPU optimization |
| **Evaluation** | Qualitative comparison | Benchmarks, ablation studies, metrics |
| **Memory Capacity** | ~250K parameters | Larger models, hierarchical memory |

The RAG comparison uses simple word overlap scoring to demonstrate *why* keyword-based retrieval fails for pattern inference. A production RAG system would use proper embeddings and vector similarity search.

---

## Acknowledgments

This project builds on the work of brilliant researchers:

**Core Research:**
- **Titans: Learning to Memorize at Test Time** (Google, Dec 2024) — [arXiv:2501.00663](https://arxiv.org/abs/2501.00663)
  - Ali Behrouz, Peilin Zhong, Vahab Mirrokni
- **Learning to (Learn at Test Time): RNNs with Expressive Hidden States** (Stanford/Meta, Jul 2024) — [arXiv:2407.04620](https://arxiv.org/abs/2407.04620)
  - Yu Sun, Xinhao Li, Karan Dalal, et al.

**Frameworks & Tools:**
- [PyTorch](https://pytorch.org/) — The foundation for neural memory implementation
- [Gradio](https://gradio.app/) — Interactive demo interface
- [HuggingFace](https://huggingface.co/) — Model hosting and inference API
- [Model Context Protocol](https://modelcontextprotocol.io/) — Claude Desktop integration

**Inspiration:**
- The broader ML community exploring alternatives to attention-based memory
- Open-source contributors who make research accessible

---

## Next Steps

Potential improvements for future iterations:

1. **Real RAG Baseline**: Integrate sentence-transformers + FAISS for proper semantic retrieval comparison
2. **Attention-Based Memory**: Implement the full Titans architecture with neural long-term memory gates
3. **Benchmarking**: Add quantitative evaluation on standard memory tasks (bAbI, etc.)
4. **Multi-Modal Support**: Extend to image/audio observations
5. **Distributed Memory**: Explore memory sharing across multiple agents
6. **Fine-Grained Forgetting**: Implement selective memory consolidation/pruning

---

## Built By

**Carlos Crespo Macaya**
AI Engineer — GenAI Systems & Applied MLOps

This project demonstrates the ability to:
1. Read cutting-edge research (Titans paper)
2. Implement it correctly (PyTorch TTT)
3. Productionize it (Docker, MCP, CI/CD)
4. Communicate it effectively (this demo)
"""


# =============================================================================
# GRADIO INTERFACE
# =============================================================================

with gr.Blocks(title="Docker Neural Memory", theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
    # Branded header
    gr.HTML(HEADER_HTML)

    with gr.Tabs():
        # TAB 1: Comparison Demo (NEW - Main Feature)
        with gr.TabItem("LLM Comparison"):
            gr.Markdown("""
            ### Neural Memory vs RAG (Retrieval-Augmented Generation)

            **Step 1:** Teach the system facts about preferences/patterns
            **Step 2:** Ask questions that require **inference**, not just retrieval

            **RAG** retrieves similar documents but can't learn patterns.
            **Neural Memory** learns from ALL observations and can infer from trends.
            """)

            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("#### Step 1: Teach Facts")
                    fact_input = gr.Textbox(
                        label="Add a Fact",
                        placeholder="e.g., 'Carlos prefers VSCode over Vim'",
                        lines=2,
                    )
                    add_fact_btn = gr.Button("Add to Knowledge Base", variant="secondary")
                    fact_output = gr.Markdown()
                    gr.Markdown("#### Example Facts to Try")
                    gr.Markdown("""
                    **Scenario: Learning User Preferences (Pattern Recognition)**
                    1. "Carlos rejected the bright colorful design"
                    2. "Carlos rejected the flashy animated homepage"
                    3. "Carlos approved the minimalist dark layout"
                    4. "Carlos approved the clean monochrome interface"

                    Then ask: **"We have a new UI mockup with neon colors - will Carlos like it?"**

                    *Neural Memory learns the pattern (Carlos prefers dark/minimal). RAG just retrieves similar facts without inferring the preference pattern.*
                    """)

                with gr.Column(scale=1):
                    gr.Markdown("#### Knowledge Base (RAG Store)")
                    kb_plot = gr.Plot(label="Facts Stored")

            # Visualizations row
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("#### Neural Memory State")
                    neural_state_plot = gr.Plot(label="Neural Network Weights & Stats")
                with gr.Column(scale=1):
                    gr.Markdown("#### Embedding Space")
                    tsne_plot = gr.Plot(label="t-SNE/PCA Visualization")

            add_fact_btn.click(
                add_to_knowledge_base,
                inputs=[fact_input],
                outputs=[fact_output, tsne_plot, neural_state_plot, kb_plot]
            )

            gr.Markdown("---")
            gr.Markdown("#### Step 2: Ask Questions & Compare Responses")

            with gr.Row():
                with gr.Column(scale=2):
                    question_input = gr.Textbox(
                        label="Ask a Question",
                        placeholder="e.g., 'We have a new UI mockup with neon colors - will Carlos like it?'",
                        lines=2,
                    )
                with gr.Column(scale=1):
                    gr.Markdown("""
                    **Best Questions for Neural Memory:**
                    - Questions requiring **pattern inference**
                    - Questions about **preferences/trends**
                    - Questions needing **generalization**
                    """)

            with gr.Row():
                compare_btn = gr.Button("Compare Responses", variant="primary", size="lg")
                reset_compare_btn = gr.Button("Reset Comparison", variant="secondary")

            # Response display - side by side with clear headers
            with gr.Row():
                with gr.Column():
                    gr.Markdown("##### Neural Memory Response")
                    nm_response = gr.Markdown()
                with gr.Column():
                    gr.Markdown("##### RAG Response")
                    vanilla_response = gr.Markdown()

            comparison_summary = gr.Markdown(label="Comparison Metrics")

            compare_btn.click(
                compare_responses,
                inputs=[question_input],
                outputs=[nm_response, vanilla_response, comparison_summary, neural_state_plot, kb_plot],
            )
            reset_compare_btn.click(
                reset_comparison,
                outputs=[comparison_summary, tsne_plot, neural_state_plot, kb_plot]
            )

        # TAB 2: How It Works (Architecture Diagrams)
        with gr.TabItem("How It Works"):
            gr.Markdown(ARCHITECTURE_INTRO_MD)

            # Neural Memory Diagram
            gr.HTML(NEURAL_MEMORY_DIAGRAM_HTML)

            # RAG Diagram
            gr.HTML(RAG_DIAGRAM_HTML)

            # LLM Integration Explanation
            gr.Markdown(LLM_INTEGRATION_MD)

            # Side-by-side comparison table
            gr.HTML(COMPARISON_TABLE_HTML)

            # Architecture Summary
            gr.Markdown(ARCHITECTURE_SUMMARY_MD)

        # TAB 3: Key Concepts
        with gr.TabItem("Key Concepts"):
            gr.HTML(KEY_CONCEPTS_HTML)

        # TAB 4: Integration & Docker
        with gr.TabItem("Integration & Docker"):
            gr.Markdown("## How Memory Modules Integrate with LLMs")
            gr.Markdown("Follow this incremental explanation to understand how both RAG and Neural Memory attach to a vanilla LLM.")

            # Step 1: Vanilla LLM
            gr.HTML(VANILLA_LLM_DIAGRAM_HTML)

            # Step 2a: RAG Integration
            gr.HTML(RAG_INTEGRATION_DIAGRAM_HTML)

            # Step 2b: Neural Memory Integration
            gr.HTML(NEURAL_MEMORY_INTEGRATION_DIAGRAM_HTML)

            # Step 3: Docker Deployment
            gr.HTML(DOCKER_DEPLOYMENT_DIAGRAM_HTML)

            # Docker details
            gr.Markdown(DOCKER_INTEGRATION_MD)

        # TAB 5: About
        with gr.TabItem("About"):
            gr.Markdown(ABOUT_MD)

    # Polished footer with profile links
    gr.HTML(FOOTER_HTML)


if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)