Spaces:
Sleeping
Sleeping
| """ | |
| Docker Neural Memory - Production Demo | |
| REAL neural memory implementation using Titans architecture. | |
| Demonstrates Docker-native AI memory with MCP server integration. | |
| Deploy to: https://huggingface.co/spaces | |
| """ | |
| import os | |
| import sys | |
| import time | |
| from dataclasses import dataclass, field | |
| from pathlib import Path | |
| from typing import Dict, List, Tuple | |
| import gradio as gr | |
| import matplotlib | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import torch | |
| from huggingface_hub import InferenceClient | |
| from sklearn.decomposition import PCA | |
| from sklearn.manifold import TSNE | |
| matplotlib.use("Agg") | |
| # ============================================================================= | |
| # CUSTOM CSS FOR POLISHED UI | |
| # ============================================================================= | |
| CUSTOM_CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600&family=Outfit:wght@300;400;500;600;700&display=swap'); | |
| :root { | |
| --neural-cyan: #00d4ff; | |
| --neural-cyan-glow: rgba(0, 212, 255, 0.3); | |
| --rag-orange: #ff8c42; | |
| --purple-accent: #a855f7; | |
| --bg-deep: #0a0a1a; | |
| --bg-card: #12122a; | |
| --bg-card-hover: #1a1a3a; | |
| --text-primary: #f8fafc; | |
| --text-secondary: #94a3b8; | |
| --border-subtle: rgba(148, 163, 184, 0.1); | |
| --success-green: #22c55e; | |
| } | |
| /* Global font settings */ | |
| .gradio-container { | |
| font-family: 'Outfit', system-ui, -apple-system, sans-serif !important; | |
| background: linear-gradient(180deg, var(--bg-deep) 0%, #0f0f23 100%) !important; | |
| } | |
| /* Headings */ | |
| .gradio-container h1, .gradio-container h2, .gradio-container h3, .gradio-container h4 { | |
| font-family: 'Outfit', sans-serif !important; | |
| font-weight: 600 !important; | |
| letter-spacing: -0.02em !important; | |
| } | |
| /* Code and monospace */ | |
| .gradio-container code, .gradio-container pre { | |
| font-family: 'JetBrains Mono', monospace !important; | |
| } | |
| /* Tab styling */ | |
| .tabs > .tab-nav > button { | |
| font-family: 'Outfit', sans-serif !important; | |
| font-weight: 500 !important; | |
| padding: 12px 24px !important; | |
| border-radius: 8px 8px 0 0 !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .tabs > .tab-nav > button.selected { | |
| background: linear-gradient(135deg, var(--neural-cyan) 0%, var(--purple-accent) 100%) !important; | |
| color: white !important; | |
| } | |
| /* Button styling */ | |
| .gr-button { | |
| font-family: 'Outfit', sans-serif !important; | |
| font-weight: 500 !important; | |
| border-radius: 8px !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .gr-button-primary { | |
| background: linear-gradient(135deg, var(--neural-cyan) 0%, var(--purple-accent) 100%) !important; | |
| border: none !important; | |
| } | |
| .gr-button-primary:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 25px var(--neural-cyan-glow) !important; | |
| } | |
| .gr-button-secondary { | |
| background: transparent !important; | |
| border: 1px solid var(--text-secondary) !important; | |
| color: var(--text-secondary) !important; | |
| } | |
| .gr-button-secondary:hover { | |
| border-color: var(--neural-cyan) !important; | |
| color: var(--neural-cyan) !important; | |
| } | |
| /* FIX: Labels should NOT look like buttons */ | |
| .gr-textbox label, .gr-plot label, .gr-dropdown label, .gr-checkbox label, | |
| label.svelte-1gfkn6j, .label-wrap, span.svelte-1gfkn6j { | |
| background: transparent !important; | |
| border: none !important; | |
| padding: 0 !important; | |
| box-shadow: none !important; | |
| font-weight: 500 !important; | |
| color: var(--text-secondary) !important; | |
| cursor: default !important; | |
| } | |
| /* Ensure label containers don't have button styling */ | |
| .gr-form > label, .gr-box > label, div[data-testid="textbox"] > label { | |
| background: none !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| /* Input styling */ | |
| .gr-textbox textarea, .gr-textbox input { | |
| font-family: 'Outfit', sans-serif !important; | |
| background: var(--bg-card) !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| border-radius: 8px !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .gr-textbox textarea:focus, .gr-textbox input:focus { | |
| border-color: var(--neural-cyan) !important; | |
| box-shadow: 0 0 0 3px var(--neural-cyan-glow) !important; | |
| } | |
| /* Card/box styling */ | |
| .gr-box, .gr-panel { | |
| background: var(--bg-card) !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| border-radius: 12px !important; | |
| } | |
| /* Plot styling */ | |
| .gr-plot { | |
| background: var(--bg-card) !important; | |
| border-radius: 12px !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| } | |
| /* Markdown styling */ | |
| .prose { | |
| color: var(--text-primary) !important; | |
| } | |
| .prose h3, .prose h4 { | |
| color: var(--neural-cyan) !important; | |
| } | |
| /* Smooth animations */ | |
| * { | |
| transition: background-color 0.2s ease, border-color 0.2s ease, box-shadow 0.2s ease; | |
| } | |
| """ | |
| HEADER_HTML = ''' | |
| <div style=" | |
| font-family: 'Outfit', system-ui, sans-serif; | |
| background: linear-gradient(135deg, #0a0a1a 0%, #1a1a3a 50%, #0a0a1a 100%); | |
| padding: 40px 30px; | |
| border-radius: 20px; | |
| margin-bottom: 20px; | |
| border: 1px solid rgba(0, 212, 255, 0.2); | |
| position: relative; | |
| overflow: hidden; | |
| "> | |
| <!-- Gradient glow effect --> | |
| <div style=" | |
| position: absolute; | |
| top: -50%; | |
| left: -50%; | |
| width: 200%; | |
| height: 200%; | |
| background: radial-gradient(circle at 30% 30%, rgba(0, 212, 255, 0.1) 0%, transparent 50%), | |
| radial-gradient(circle at 70% 70%, rgba(168, 85, 247, 0.1) 0%, transparent 50%); | |
| pointer-events: none; | |
| "></div> | |
| <div style="position: relative; z-index: 1;"> | |
| <!-- Logo and title --> | |
| <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 15px;"> | |
| <div style=" | |
| font-size: 48px; | |
| background: linear-gradient(135deg, #00d4ff 0%, #a855f7 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| ">🧠</div> | |
| <div> | |
| <h1 style=" | |
| font-size: 2.5em; | |
| font-weight: 700; | |
| margin: 0; | |
| background: linear-gradient(135deg, #00d4ff 0%, #a855f7 50%, #00d4ff 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| letter-spacing: -0.02em; | |
| ">Docker Neural Memory</h1> | |
| <p style=" | |
| color: #94a3b8; | |
| margin: 5px 0 0 0; | |
| font-size: 1.1em; | |
| font-weight: 300; | |
| ">Test-Time Training: Evolving LLMs from data hoarders to knowledge creators</p> | |
| </div> | |
| </div> | |
| <!-- Feature badges --> | |
| <div style="display: flex; gap: 12px; flex-wrap: wrap; margin-top: 20px;"> | |
| <span style=" | |
| background: linear-gradient(135deg, rgba(0, 212, 255, 0.2) 0%, rgba(0, 212, 255, 0.1) 100%); | |
| border: 1px solid rgba(0, 212, 255, 0.3); | |
| color: #00d4ff; | |
| padding: 8px 16px; | |
| border-radius: 20px; | |
| font-size: 0.85em; | |
| font-weight: 500; | |
| ">⚡ PyTorch TTT</span> | |
| <span style=" | |
| background: linear-gradient(135deg, rgba(168, 85, 247, 0.2) 0%, rgba(168, 85, 247, 0.1) 100%); | |
| border: 1px solid rgba(168, 85, 247, 0.3); | |
| color: #a855f7; | |
| padding: 8px 16px; | |
| border-radius: 20px; | |
| font-size: 0.85em; | |
| font-weight: 500; | |
| ">🐳 Docker Native</span> | |
| <span style=" | |
| background: linear-gradient(135deg, rgba(34, 197, 94, 0.2) 0%, rgba(34, 197, 94, 0.1) 100%); | |
| border: 1px solid rgba(34, 197, 94, 0.3); | |
| color: #22c55e; | |
| padding: 8px 16px; | |
| border-radius: 20px; | |
| font-size: 0.85em; | |
| font-weight: 500; | |
| ">🔌 MCP Server</span> | |
| <span style=" | |
| background: linear-gradient(135deg, rgba(255, 140, 66, 0.2) 0%, rgba(255, 140, 66, 0.1) 100%); | |
| border: 1px solid rgba(255, 140, 66, 0.3); | |
| color: #ff8c42; | |
| padding: 8px 16px; | |
| border-radius: 20px; | |
| font-size: 0.85em; | |
| font-weight: 500; | |
| ">📊 Titans Architecture</span> | |
| </div> | |
| </div> | |
| </div> | |
| ''' | |
| FOOTER_HTML = ''' | |
| <div style=" | |
| font-family: 'Outfit', system-ui, sans-serif; | |
| background: linear-gradient(135deg, #0a0a1a 0%, #12122a 100%); | |
| padding: 30px; | |
| border-radius: 16px; | |
| margin-top: 30px; | |
| border: 1px solid rgba(148, 163, 184, 0.1); | |
| "> | |
| <div style="display: flex; justify-content: space-between; align-items: center; flex-wrap: wrap; gap: 20px;"> | |
| <!-- Left side: Built by --> | |
| <div> | |
| <p style="color: #94a3b8; margin: 0 0 8px 0; font-size: 0.9em;">Built by</p> | |
| <p style="color: #f8fafc; margin: 0; font-size: 1.2em; font-weight: 600;">Carlos Crespo Macaya</p> | |
| <p style="color: #64748b; margin: 5px 0 0 0; font-size: 0.85em;">AI Engineer — GenAI Systems & Applied MLOps</p> | |
| </div> | |
| <!-- Right side: Social links --> | |
| <div style="display: flex; gap: 12px; flex-wrap: wrap;"> | |
| <a href="https://github.com/macayaven/docker-neural-memory" target="_blank" style=" | |
| display: flex; align-items: center; gap: 8px; | |
| background: rgba(255,255,255,0.05); | |
| padding: 10px 16px; | |
| border-radius: 8px; | |
| text-decoration: none; | |
| color: #f8fafc; | |
| font-size: 0.9em; | |
| transition: all 0.3s ease; | |
| border: 1px solid transparent; | |
| " onmouseover="this.style.borderColor='#f8fafc'; this.style.background='rgba(255,255,255,0.1)';" | |
| onmouseout="this.style.borderColor='transparent'; this.style.background='rgba(255,255,255,0.05)';"> | |
| <span style="font-size: 1.2em;">🐙</span> GitHub | |
| </a> | |
| <a href="https://www.linkedin.com/in/carlos-crespo-macaya/" target="_blank" style=" | |
| display: flex; align-items: center; gap: 8px; | |
| background: rgba(255,255,255,0.05); | |
| padding: 10px 16px; | |
| border-radius: 8px; | |
| text-decoration: none; | |
| color: #f8fafc; | |
| font-size: 0.9em; | |
| transition: all 0.3s ease; | |
| border: 1px solid transparent; | |
| " onmouseover="this.style.borderColor='#0077b5'; this.style.color='#0077b5';" | |
| onmouseout="this.style.borderColor='transparent'; this.style.color='#f8fafc';"> | |
| <span style="font-size: 1.2em;">💼</span> LinkedIn | |
| </a> | |
| <a href="https://www.kaggle.com/macayaven" target="_blank" style=" | |
| display: flex; align-items: center; gap: 8px; | |
| background: rgba(255,255,255,0.05); | |
| padding: 10px 16px; | |
| border-radius: 8px; | |
| text-decoration: none; | |
| color: #f8fafc; | |
| font-size: 0.9em; | |
| transition: all 0.3s ease; | |
| border: 1px solid transparent; | |
| " onmouseover="this.style.borderColor='#20beff'; this.style.color='#20beff';" | |
| onmouseout="this.style.borderColor='transparent'; this.style.color='#f8fafc';"> | |
| <span style="font-size: 1.2em;">📊</span> Kaggle <span style="background: linear-gradient(135deg, #ffd700, #ffb700); color: #000; padding: 2px 6px; border-radius: 4px; font-size: 0.75em; font-weight: 600;">2×🥇</span> | |
| </a> | |
| <a href="https://huggingface.co/macayaven" target="_blank" style=" | |
| display: flex; align-items: center; gap: 8px; | |
| background: rgba(255,255,255,0.05); | |
| padding: 10px 16px; | |
| border-radius: 8px; | |
| text-decoration: none; | |
| color: #f8fafc; | |
| font-size: 0.9em; | |
| transition: all 0.3s ease; | |
| border: 1px solid transparent; | |
| " onmouseover="this.style.borderColor='#ff9d00'; this.style.color='#ff9d00';" | |
| onmouseout="this.style.borderColor='transparent'; this.style.color='#f8fafc';"> | |
| <span style="font-size: 1.2em;">🤗</span> HuggingFace | |
| </a> | |
| <a href="https://scholar.google.com/citations?user=hwvDud0AAAAJ" target="_blank" style=" | |
| display: flex; align-items: center; gap: 8px; | |
| background: rgba(255,255,255,0.05); | |
| padding: 10px 16px; | |
| border-radius: 8px; | |
| text-decoration: none; | |
| color: #f8fafc; | |
| font-size: 0.9em; | |
| transition: all 0.3s ease; | |
| border: 1px solid transparent; | |
| " onmouseover="this.style.borderColor='#4285f4'; this.style.color='#4285f4';" | |
| onmouseout="this.style.borderColor='transparent'; this.style.color='#f8fafc';"> | |
| <span style="font-size: 1.2em;">🎓</span> Scholar | |
| </a> | |
| <a href="https://carlos-crespo.com" target="_blank" style=" | |
| display: flex; align-items: center; gap: 8px; | |
| background: rgba(255,255,255,0.05); | |
| padding: 10px 16px; | |
| border-radius: 8px; | |
| text-decoration: none; | |
| color: #f8fafc; | |
| font-size: 0.9em; | |
| transition: all 0.3s ease; | |
| border: 1px solid transparent; | |
| " onmouseover="this.style.borderColor='#00d4ff'; this.style.color='#00d4ff';" | |
| onmouseout="this.style.borderColor='transparent'; this.style.color='#f8fafc';"> | |
| <span style="font-size: 1.2em;">🌐</span> Website | |
| </a> | |
| </div> | |
| </div> | |
| <div style="margin-top: 20px; padding-top: 20px; border-top: 1px solid rgba(148, 163, 184, 0.1); text-align: center;"> | |
| <p style="color: #64748b; margin: 0; font-size: 0.85em;"> | |
| Docker Neural Memory — Containerized AI memory with real test-time training | |
| </p> | |
| </div> | |
| </div> | |
| ''' | |
| # ============================================================================= | |
| # HUGGINGFACE INFERENCE CLIENT | |
| # ============================================================================= | |
| # Use a model that is available on HF Serverless Inference free tier | |
| # See: https://huggingface.co/models?inference_provider=hf-inference&pipeline_tag=text-generation | |
| HF_MODEL = os.getenv("HF_MODEL", "HuggingFaceTB/SmolLM3-3B") | |
| HF_TOKEN = os.getenv("HF_TOKEN", None) # Optional - works without for many models | |
| try: | |
| hf_client = InferenceClient(model=HF_MODEL, token=HF_TOKEN) | |
| LLM_AVAILABLE = True | |
| except Exception as e: | |
| print(f"Warning: Could not initialize HF client: {e}") | |
| hf_client = None | |
| LLM_AVAILABLE = False | |
| # Add src to path for real implementation | |
| # When deployed to HF Spaces, src/ is copied to the same directory as app.py | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| from src.config import MemoryConfig # noqa: E402 | |
| from src.memory.neural_memory import NeuralMemory # noqa: E402 | |
| # ============================================================================= | |
| # REAL NEURAL MEMORY INSTANCE | |
| # ============================================================================= | |
| # Initialize the REAL neural memory - this is actual PyTorch, not a simulation | |
| memory = NeuralMemory(MemoryConfig(dim=256, learning_rate=0.02)) | |
| # Track history for visualization | |
| observation_history: List[Dict] = [] | |
| # ============================================================================= | |
| # COMPARISON METRICS & KNOWLEDGE BASE | |
| # ============================================================================= | |
| class ComparisonMetrics: | |
| """Track comparison between vanilla and memory-augmented responses.""" | |
| # With Neural Memory | |
| nm_queries: int = 0 | |
| nm_correct: int = 0 | |
| nm_hallucinations: int = 0 | |
| nm_response_times: List[float] = field(default_factory=list) | |
| # Vanilla (no memory) | |
| vanilla_queries: int = 0 | |
| vanilla_correct: int = 0 | |
| vanilla_hallucinations: int = 0 | |
| vanilla_response_times: List[float] = field(default_factory=list) | |
| metrics = ComparisonMetrics() | |
| # Knowledge base - facts the user teaches | |
| knowledge_base: List[Dict[str, str]] = [] | |
| # Store embeddings for t-SNE visualization | |
| embeddings_store: List[Dict] = [] | |
| def get_embedding(text: str) -> np.ndarray: | |
| """Get the neural memory's internal representation of text.""" | |
| with torch.no_grad(): | |
| # Convert text to tensor using memory's encoding | |
| tensor = memory._encode_text(text) | |
| # Pass through memory network to get learned representation | |
| output = memory.memory_net(tensor) | |
| # Flatten and ensure fixed size (pad or truncate to 256) | |
| flat = output.cpu().numpy().flatten() | |
| target_size = 256 | |
| if len(flat) < target_size: | |
| # Pad with zeros | |
| flat = np.pad(flat, (0, target_size - len(flat)), mode='constant') | |
| elif len(flat) > target_size: | |
| # Truncate | |
| flat = flat[:target_size] | |
| return flat | |
| def create_knowledge_base_visualization() -> plt.Figure: | |
| """Create visualization of the knowledge base (RAG store).""" | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| if not knowledge_base: | |
| ax.text( | |
| 0.5, 0.5, | |
| "No facts in knowledge base yet.\nAdd facts to see them here.", | |
| ha="center", va="center", fontsize=14, color="gray" | |
| ) | |
| ax.set_xlim(0, 1) | |
| ax.set_ylim(0, 1) | |
| ax.axis("off") | |
| ax.set_title("Knowledge Base (RAG Store)", fontsize=14, fontweight="bold") | |
| return fig | |
| # Create a visual list of facts | |
| n_facts = len(knowledge_base) | |
| y_positions = np.linspace(0.9, 0.1, min(n_facts, 10)) | |
| ax.set_xlim(0, 1) | |
| ax.set_ylim(0, 1) | |
| ax.axis("off") | |
| # Title | |
| ax.set_title(f"Knowledge Base (RAG Store) - {n_facts} Facts", fontsize=14, fontweight="bold") | |
| # Draw facts as cards | |
| for i, (y_pos, item) in enumerate(zip(y_positions, knowledge_base[-10:])): | |
| fact_text = item["fact"] | |
| if len(fact_text) > 60: | |
| fact_text = fact_text[:57] + "..." | |
| # Draw a rounded rectangle | |
| rect = plt.Rectangle((0.02, y_pos - 0.035), 0.96, 0.07, | |
| facecolor="#e8f4f8", edgecolor="#3498db", | |
| linewidth=2, alpha=0.8, zorder=1) | |
| ax.add_patch(rect) | |
| # Add fact number and text | |
| ax.text(0.05, y_pos, f"#{len(knowledge_base) - len(knowledge_base[-10:]) + i + 1}", | |
| fontsize=10, fontweight="bold", color="#2980b9", va="center") | |
| ax.text(0.12, y_pos, fact_text, fontsize=10, va="center", color="#2c3e50") | |
| if n_facts > 10: | |
| ax.text(0.5, 0.02, f"... and {n_facts - 10} more facts", | |
| ha="center", fontsize=9, color="gray", style="italic") | |
| plt.tight_layout() | |
| return fig | |
| def create_neural_memory_state_visualization() -> plt.Figure: | |
| """Create visualization of the neural memory state.""" | |
| fig, axes = plt.subplots(1, 3, figsize=(14, 4)) | |
| # 1. Weight distribution histogram | |
| ax1 = axes[0] | |
| with torch.no_grad(): | |
| all_weights = [] | |
| for param in memory.memory_net.parameters(): | |
| all_weights.extend(param.data.cpu().numpy().flatten()) | |
| all_weights = np.array(all_weights) | |
| ax1.hist(all_weights, bins=50, color="#3498db", alpha=0.7, edgecolor="white") | |
| ax1.axvline(x=0, color="red", linestyle="--", alpha=0.5) | |
| ax1.set_title("Weight Distribution", fontsize=11, fontweight="bold") | |
| ax1.set_xlabel("Weight Value") | |
| ax1.set_ylabel("Count") | |
| ax1.grid(True, alpha=0.3) | |
| # 2. Weight heatmap (sample) | |
| ax2 = axes[1] | |
| weights = get_weight_sample() | |
| im = ax2.imshow(weights, cmap="RdBu_r", aspect="auto", vmin=-0.5, vmax=0.5) | |
| ax2.set_title("Weight Matrix Sample (16x16)", fontsize=11, fontweight="bold") | |
| ax2.axis("off") | |
| plt.colorbar(im, ax=ax2, label="Value") | |
| # 3. Memory stats | |
| ax3 = axes[2] | |
| ax3.axis("off") | |
| stats = memory.get_stats() | |
| stats_text = f""" | |
| Neural Memory State | |
| ─────────────────── | |
| Parameters: {stats['weight_parameters']:,} | |
| Dimension: {stats['dimension']} | |
| Learning Rate: {stats['learning_rate']:.4f} | |
| Observations: {stats['total_observations']} | |
| Avg Surprise: {stats['avg_surprise']:.4f} | |
| Weight Stats: | |
| • Mean: {np.mean(all_weights):.4f} | |
| • Std: {np.std(all_weights):.4f} | |
| • Min: {np.min(all_weights):.4f} | |
| • Max: {np.max(all_weights):.4f} | |
| """ | |
| ax3.text(0.1, 0.5, stats_text, fontsize=10, family="monospace", | |
| va="center", transform=ax3.transAxes, | |
| bbox={"boxstyle": "round,pad=0.5", "facecolor": "#f0f0f0", "alpha": 0.8}) | |
| ax3.set_title("Memory Statistics", fontsize=11, fontweight="bold") | |
| plt.tight_layout() | |
| return fig | |
| def create_tsne_visualization() -> plt.Figure: | |
| """Create t-SNE visualization of learned representations.""" | |
| fig, ax = plt.subplots(figsize=(10, 8)) | |
| if len(embeddings_store) < 2: | |
| ax.text( | |
| 0.5, 0.5, | |
| "Add at least 2 facts to see the embedding space", | |
| ha="center", va="center", fontsize=14, color="gray" | |
| ) | |
| ax.set_xlim(0, 1) | |
| ax.set_ylim(0, 1) | |
| ax.axis("off") | |
| return fig | |
| # Extract embeddings and labels | |
| embeddings = np.array([e["embedding"] for e in embeddings_store]) | |
| labels = [e["label"][:30] + "..." if len(e["label"]) > 30 else e["label"] | |
| for e in embeddings_store] | |
| surprises = [e["surprise"] for e in embeddings_store] | |
| # Use PCA if few samples, t-SNE otherwise | |
| n_samples = len(embeddings) | |
| if n_samples < 5: | |
| # PCA for small sample sizes | |
| reducer = PCA(n_components=2) | |
| reduced = reducer.fit_transform(embeddings) | |
| method = "PCA" | |
| else: | |
| # t-SNE for larger sample sizes | |
| perplexity = min(30, n_samples - 1) | |
| reducer = TSNE(n_components=2, perplexity=perplexity, random_state=42) | |
| reduced = reducer.fit_transform(embeddings) | |
| method = "t-SNE" | |
| # Plot points | |
| scatter = ax.scatter( | |
| reduced[:, 0], reduced[:, 1], | |
| c=surprises, cmap="RdYlBu_r", | |
| s=150, alpha=0.7, edgecolors="white", linewidth=2 | |
| ) | |
| # Add labels | |
| for i, label in enumerate(labels): | |
| ax.annotate( | |
| label, (reduced[i, 0], reduced[i, 1]), | |
| xytext=(5, 5), textcoords="offset points", | |
| fontsize=9, alpha=0.8, | |
| bbox={"boxstyle": "round,pad=0.3", "facecolor": "white", "alpha": 0.7} | |
| ) | |
| # Colorbar | |
| cbar = plt.colorbar(scatter, ax=ax) | |
| cbar.set_label("Surprise (Red=Novel, Blue=Familiar)", fontsize=10) | |
| ax.set_title(f"Neural Memory Embedding Space ({method})\n" | |
| f"{n_samples} observations - Similar concepts cluster together", | |
| fontsize=12, fontweight="bold") | |
| ax.set_xlabel("Dimension 1") | |
| ax.set_ylabel("Dimension 2") | |
| ax.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| return fig | |
| def create_embedding_comparison() -> plt.Figure: | |
| """Create side-by-side: weight heatmap + embedding space.""" | |
| fig, axes = plt.subplots(1, 2, figsize=(14, 6)) | |
| # Left: Weight heatmap | |
| ax1 = axes[0] | |
| weights = get_weight_sample() | |
| im = ax1.imshow(weights, cmap="RdBu_r", aspect="auto", vmin=-0.5, vmax=0.5) | |
| ax1.set_title("Neural Network Weights\n(These update during learning)", | |
| fontsize=11, fontweight="bold") | |
| ax1.axis("off") | |
| plt.colorbar(im, ax=ax1, label="Weight Value") | |
| # Right: Embedding space (simplified if few points) | |
| ax2 = axes[1] | |
| if len(embeddings_store) < 2: | |
| ax2.text(0.5, 0.5, "Add facts to see\nembedding space", | |
| ha="center", va="center", fontsize=12, color="gray") | |
| ax2.set_xlim(0, 1) | |
| ax2.set_ylim(0, 1) | |
| else: | |
| embeddings = np.array([e["embedding"] for e in embeddings_store]) | |
| surprises = [e["surprise"] for e in embeddings_store] | |
| n_samples = len(embeddings) | |
| if n_samples < 5: | |
| reducer = PCA(n_components=2) | |
| else: | |
| perplexity = min(30, n_samples - 1) | |
| reducer = TSNE(n_components=2, perplexity=perplexity, random_state=42) | |
| reduced = reducer.fit_transform(embeddings) | |
| scatter = ax2.scatter(reduced[:, 0], reduced[:, 1], c=surprises, | |
| cmap="RdYlBu_r", s=100, alpha=0.7) | |
| plt.colorbar(scatter, ax=ax2, label="Surprise") | |
| ax2.grid(True, alpha=0.3) | |
| ax2.set_title("Learned Representations\n(Similar facts cluster together)", | |
| fontsize=11, fontweight="bold") | |
| plt.tight_layout() | |
| return fig | |
| def call_llm(prompt: str, context: str = "") -> Tuple[str, float]: | |
| """Call HuggingFace LLM. Returns (response, time).""" | |
| if not LLM_AVAILABLE or hf_client is None: | |
| return "[LLM not available - set HF_TOKEN for comparison demo]", 0.0 | |
| try: | |
| # Build messages for chat completion | |
| if context: | |
| system_msg = f"""You have access to the following knowledge: | |
| {context} | |
| Based ONLY on the knowledge above, answer questions. If the information is not in the knowledge provided, say "I don't have information about that." | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_msg}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| else: | |
| messages = [ | |
| {"role": "user", "content": prompt}, | |
| ] | |
| start = time.time() | |
| response = hf_client.chat_completion( | |
| messages=messages, | |
| max_tokens=150, | |
| temperature=0.7, | |
| ) | |
| elapsed = time.time() - start | |
| # Extract the response content | |
| answer = response.choices[0].message.content | |
| return answer.strip() if answer else "", elapsed | |
| except Exception as e: | |
| return f"Error: {e!s}", 0.0 | |
| def add_to_knowledge_base(fact: str) -> Tuple[str, plt.Figure, plt.Figure, plt.Figure]: | |
| """Add a fact to the knowledge base and observe it in neural memory.""" | |
| if not fact.strip(): | |
| return ( | |
| "Please enter a fact to add.", | |
| create_tsne_visualization(), | |
| create_neural_memory_state_visualization(), | |
| create_knowledge_base_visualization(), | |
| ) | |
| # Add to knowledge base | |
| knowledge_base.append({"fact": fact, "timestamp": time.time()}) | |
| # Observe in neural memory | |
| result = memory.observe(fact) | |
| # Store embedding for visualization | |
| embedding = get_embedding(fact) | |
| embeddings_store.append({ | |
| "label": fact, | |
| "embedding": embedding, | |
| "surprise": result["surprise"], | |
| "timestamp": time.time(), | |
| }) | |
| output = f"""### Fact Added | |
| **Fact:** "{fact}" | |
| **Neural Memory Response:** | |
| | Metric | Value | | |
| |--------|-------| | |
| | Surprise | {result['surprise']:.4f} | | |
| | Weight Delta | {result['weight_delta']:.6f} | | |
| | Learned | {'Yes' if result['learned'] else 'No'} | | |
| **Knowledge Base Size:** {len(knowledge_base)} facts | |
| **Embeddings Stored:** {len(embeddings_store)} | |
| """ | |
| return ( | |
| output, | |
| create_tsne_visualization(), | |
| create_neural_memory_state_visualization(), | |
| create_knowledge_base_visualization(), | |
| ) | |
| def get_knowledge_context() -> str: | |
| """Get all facts as context string.""" | |
| if not knowledge_base: | |
| return "" | |
| return "\n".join([f"- {item['fact']}" for item in knowledge_base]) | |
| def call_rag_llm(question: str, knowledge_base: List[Dict]) -> Tuple[str, float, List[str]]: | |
| """Simulate RAG: retrieve most similar facts by keyword matching.""" | |
| if not LLM_AVAILABLE or hf_client is None: | |
| return "[LLM not available]", 0.0, [] | |
| # Simple RAG simulation: keyword-based retrieval (top 2 most relevant) | |
| question_words = set(question.lower().split()) | |
| scored_facts = [] | |
| for item in knowledge_base: | |
| fact = item["fact"] | |
| fact_words = set(fact.lower().split()) | |
| # Simple overlap score | |
| overlap = len(question_words & fact_words) | |
| scored_facts.append((overlap, fact)) | |
| # Get top 2 most relevant facts | |
| scored_facts.sort(reverse=True, key=lambda x: x[0]) | |
| retrieved = [f for score, f in scored_facts[:2] if score > 0] | |
| if retrieved: | |
| context = "Retrieved facts:\n" + "\n".join([f"- {f}" for f in retrieved]) | |
| system_msg = f"""You are a RAG system. You can ONLY use the retrieved facts below to answer. | |
| If the retrieved facts don't directly answer the question, say "The retrieved information doesn't cover this." | |
| {context} | |
| """ | |
| else: | |
| system_msg = "You are a RAG system with no relevant documents retrieved. Say 'No relevant documents found.'" | |
| retrieved = ["(none retrieved)"] | |
| messages = [ | |
| {"role": "system", "content": system_msg}, | |
| {"role": "user", "content": question}, | |
| ] | |
| try: | |
| start = time.time() | |
| response = hf_client.chat_completion(messages=messages, max_tokens=150, temperature=0.7) | |
| elapsed = time.time() - start | |
| answer = response.choices[0].message.content | |
| return answer.strip() if answer else "", elapsed, retrieved | |
| except Exception as e: | |
| return f"Error: {e!s}", 0.0, retrieved | |
| def call_neural_memory_llm(question: str, knowledge_base: List[Dict], surprise: float) -> Tuple[str, float]: | |
| """Neural Memory augmented LLM: uses ALL facts + learned patterns.""" | |
| if not LLM_AVAILABLE or hf_client is None: | |
| return "[LLM not available]", 0.0 | |
| # Neural memory provides ALL context + pattern awareness | |
| all_facts = "\n".join([f"- {item['fact']}" for item in knowledge_base]) | |
| # Analyze patterns in the facts | |
| patterns_hint = "" | |
| if knowledge_base: | |
| # Look for approval/rejection patterns | |
| approvals = [f["fact"] for f in knowledge_base if "approved" in f["fact"].lower() or "liked" in f["fact"].lower()] | |
| rejections = [f["fact"] for f in knowledge_base if "rejected" in f["fact"].lower() or "disliked" in f["fact"].lower()] | |
| if approvals or rejections: | |
| patterns_hint = "\n\nLearned patterns from observations:" | |
| if approvals: | |
| patterns_hint += f"\n- Positive signals: {len(approvals)} approvals/likes" | |
| if rejections: | |
| patterns_hint += f"\n- Negative signals: {len(rejections)} rejections/dislikes" | |
| patterns_hint += "\n- Look for common themes in approved vs rejected items" | |
| system_msg = f"""You are an AI with neural memory that has LEARNED from all observations below. | |
| Unlike simple retrieval, you should: | |
| 1. Consider ALL facts holistically | |
| 2. Identify PATTERNS across multiple observations | |
| 3. Make INFERENCES based on learned patterns | |
| 4. Predict based on trends, not just direct matches | |
| Observations (learned knowledge): | |
| {all_facts} | |
| {patterns_hint} | |
| Question novelty (surprise score): {surprise:.2f} | |
| - Low surprise (<0.3): This topic is familiar from your observations | |
| - High surprise (>0.6): This is a novel topic, be cautious | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_msg}, | |
| {"role": "user", "content": question}, | |
| ] | |
| try: | |
| start = time.time() | |
| response = hf_client.chat_completion(messages=messages, max_tokens=200, temperature=0.7) | |
| elapsed = time.time() - start | |
| answer = response.choices[0].message.content | |
| return answer.strip() if answer else "", elapsed | |
| except Exception as e: | |
| return f"Error: {e!s}", 0.0 | |
| def compare_responses(question: str) -> Tuple[str, str, str, plt.Figure, plt.Figure]: | |
| """Compare RAG vs Neural Memory augmented LLM on the same question.""" | |
| global metrics | |
| if not question.strip(): | |
| return "", "", "", create_neural_memory_state_visualization(), create_knowledge_base_visualization() | |
| if not LLM_AVAILABLE: | |
| return ( | |
| "LLM not available. Please set HF_TOKEN environment variable.", | |
| "LLM not available.", | |
| "Comparison requires LLM access.", | |
| create_neural_memory_state_visualization(), | |
| create_knowledge_base_visualization(), | |
| ) | |
| # Check surprise (is this question familiar?) | |
| surprise = memory.surprise(question) | |
| # Query with NEURAL MEMORY (pattern learning, all context) | |
| nm_response, nm_time = call_neural_memory_llm(question, knowledge_base, surprise) | |
| metrics.nm_queries += 1 | |
| metrics.nm_response_times.append(nm_time) | |
| # Query with RAG (simple retrieval) | |
| rag_response, rag_time, retrieved_facts = call_rag_llm(question, knowledge_base) | |
| metrics.vanilla_queries += 1 | |
| metrics.vanilla_response_times.append(rag_time) | |
| # Simple quality detection | |
| rag_failed = any( | |
| phrase in rag_response.lower() | |
| for phrase in ["doesn't cover", "no relevant", "don't have", "cannot answer"] | |
| ) | |
| nm_confident = not any( | |
| phrase in nm_response.lower() | |
| for phrase in ["i don't know", "i don't have", "cannot"] | |
| ) | |
| if rag_failed: | |
| metrics.vanilla_hallucinations += 1 | |
| if nm_confident and knowledge_base: | |
| metrics.nm_correct += 1 | |
| # Format outputs - Neural Memory | |
| nm_output = f"""### Neural Memory (Pattern Learning) | |
| **Question:** {question} | |
| **Response:** | |
| > {nm_response} | |
| --- | |
| **How it works:** | |
| - Uses **ALL {len(knowledge_base)} facts** holistically | |
| - **Learns patterns** (e.g., approval vs rejection trends) | |
| - **Surprise Score: {surprise:.3f}** - {'familiar topic' if surprise < 0.4 else 'novel topic'} | |
| - Response Time: {nm_time:.2f}s | |
| """ | |
| # Format outputs - RAG | |
| retrieved_str = "\n".join([f" - {f}" for f in retrieved_facts]) | |
| rag_output = f"""### RAG (Retrieval Only) | |
| **Question:** {question} | |
| **Response:** | |
| > {rag_response} | |
| --- | |
| **How it works:** | |
| - Retrieved **{len([f for f in retrieved_facts if f != '(none retrieved)'])} facts** by keyword match: | |
| {retrieved_str} | |
| - **No pattern learning** - just similarity search | |
| - Response Time: {rag_time:.2f}s | |
| """ | |
| # Comparison summary | |
| comparison = get_comparison_summary() | |
| return ( | |
| nm_output, | |
| rag_output, | |
| comparison, | |
| create_neural_memory_state_visualization(), | |
| create_knowledge_base_visualization(), | |
| ) | |
| def get_comparison_summary() -> str: | |
| """Generate comparison metrics summary.""" | |
| nm_avg_time = ( | |
| sum(metrics.nm_response_times) / len(metrics.nm_response_times) | |
| if metrics.nm_response_times | |
| else 0 | |
| ) | |
| rag_avg_time = ( | |
| sum(metrics.vanilla_response_times) / len(metrics.vanilla_response_times) | |
| if metrics.vanilla_response_times | |
| else 0 | |
| ) | |
| nm_accuracy = ( | |
| metrics.nm_correct / metrics.nm_queries * 100 if metrics.nm_queries else 0 | |
| ) | |
| rag_fail_rate = ( | |
| metrics.vanilla_hallucinations / metrics.vanilla_queries * 100 | |
| if metrics.vanilla_queries | |
| else 0 | |
| ) | |
| return f"""## Neural Memory vs RAG Comparison | |
| | Metric | Neural Memory | RAG | | |
| |--------|---------------|-----| | |
| | **Queries** | {metrics.nm_queries} | {metrics.vanilla_queries} | | |
| | **Pattern-Based Answers** | {metrics.nm_correct} ({nm_accuracy:.0f}%) | N/A | | |
| | **Retrieval Failures** | N/A | {metrics.vanilla_hallucinations} ({rag_fail_rate:.0f}%) | | |
| | **Avg Response Time** | {nm_avg_time:.2f}s | {rag_avg_time:.2f}s | | |
| ### Knowledge Base: {len(knowledge_base)} facts stored | |
| ### Why Neural Memory Wins | |
| | Capability | Neural Memory | RAG | | |
| |------------|---------------|-----| | |
| | **Pattern Learning** | Learns trends across all data | No learning | | |
| | **Inference** | Can predict from patterns | Only retrieves matches | | |
| | **Context Usage** | Uses ALL facts holistically | Uses top-k retrieved | | |
| | **Novelty Detection** | Surprise score | None | | |
| | **Memory Size** | Fixed (neural weights) | Grows with data | | |
| ### Key Insight | |
| Neural memory **learns patterns** (e.g., "Carlos rejects bright colors, approves dark themes") | |
| and can **infer preferences** for novel items. RAG just retrieves similar documents. | |
| """ | |
| def reset_comparison() -> Tuple[str, plt.Figure, plt.Figure, plt.Figure]: | |
| """Reset comparison metrics and knowledge base.""" | |
| global metrics, knowledge_base, embeddings_store | |
| metrics = ComparisonMetrics() | |
| knowledge_base = [] | |
| embeddings_store = [] | |
| return ( | |
| "Comparison reset. Knowledge base and embeddings cleared.", | |
| create_tsne_visualization(), | |
| create_neural_memory_state_visualization(), | |
| create_knowledge_base_visualization(), | |
| ) | |
| def reset_memory(): | |
| """Reset to fresh memory state.""" | |
| global memory, observation_history | |
| memory = NeuralMemory(MemoryConfig(dim=256, learning_rate=0.02)) | |
| observation_history = [] | |
| return "Memory reset. Fresh neural network initialized." | |
| # ============================================================================= | |
| # VISUALIZATION | |
| # ============================================================================= | |
| def get_weight_sample() -> np.ndarray: | |
| """Extract 16x16 sample of actual neural weights.""" | |
| with torch.no_grad(): | |
| # Get weights from first linear layer | |
| weights = memory.memory_net[0].weight.data[:16, :16] | |
| return weights.cpu().numpy() | |
| def create_weight_visualization() -> plt.Figure: | |
| """Visualize actual neural network weights.""" | |
| weights = get_weight_sample() | |
| fig, ax = plt.subplots(figsize=(6, 5)) | |
| im = ax.imshow(weights, cmap="RdBu_r", aspect="auto", vmin=-0.5, vmax=0.5) | |
| ax.set_title( | |
| f"Neural Memory Weights\n({sum(p.numel() for p in memory.memory_net.parameters()):,} parameters)", | |
| fontsize=12, | |
| fontweight="bold", | |
| ) | |
| ax.set_xlabel("These weights UPDATE during inference (TTT)") | |
| ax.axis("off") | |
| plt.colorbar(im, ax=ax, label="Weight Value") | |
| plt.tight_layout() | |
| return fig | |
| def create_history_plot() -> plt.Figure: | |
| """Plot surprise history.""" | |
| fig, ax = plt.subplots(figsize=(8, 3)) | |
| if observation_history: | |
| surprises = [h["surprise"] for h in observation_history] | |
| x = range(1, len(surprises) + 1) | |
| ax.plot(x, surprises, "o-", color="#e74c3c", linewidth=2, markersize=8) | |
| ax.axhline(y=0.5, color="gray", linestyle="--", alpha=0.5, label="Threshold") | |
| ax.set_xlabel("Observation #") | |
| ax.set_ylabel("Surprise") | |
| ax.set_ylim(0, 1) | |
| ax.grid(True, alpha=0.3) | |
| ax.legend() | |
| else: | |
| ax.text(0.5, 0.5, "No observations yet", ha="center", va="center", fontsize=12, color="gray") | |
| ax.set_xlim(0, 1) | |
| ax.set_ylim(0, 1) | |
| ax.set_title("Learning Progress (Surprise Over Time)", fontsize=12, fontweight="bold") | |
| plt.tight_layout() | |
| return fig | |
| # ============================================================================= | |
| # CORE MEMORY OPERATIONS | |
| # ============================================================================= | |
| def observe_content(content: str) -> tuple[str, plt.Figure, plt.Figure]: | |
| """ | |
| Feed content to REAL neural memory - triggers actual gradient updates. | |
| """ | |
| if not content.strip(): | |
| return "Please enter content to observe.", None, None | |
| # Get weight hash BEFORE | |
| hash_before = memory.get_weight_hash() | |
| # REAL observation with actual gradient descent | |
| result = memory.observe(content) | |
| # Get weight hash AFTER | |
| hash_after = memory.get_weight_hash() | |
| # Record history | |
| observation_history.append({ | |
| "content": content[:50], | |
| "surprise": result["surprise"], | |
| "weight_delta": result["weight_delta"], | |
| "learned": result["learned"], | |
| }) | |
| # Format result | |
| weights_changed = hash_before != hash_after | |
| output = f"""## Observation Result | |
| **Content:** "{content[:100]}{'...' if len(content) > 100 else ''}" | |
| ### Metrics (REAL - from PyTorch gradient descent) | |
| | Metric | Value | | |
| |--------|-------| | |
| | **Surprise** | {result['surprise']:.4f} | | |
| | **Weight Delta** | {result['weight_delta']:.6f} | | |
| | **Weights Changed** | {'YES' if weights_changed else 'NO'} | | |
| | **Hash Before** | `{hash_before}` | | |
| | **Hash After** | `{hash_after}` | | |
| ### What Just Happened | |
| 1. Text was encoded to tensor representation | |
| 2. Forward pass through neural memory network | |
| 3. **Surprise computed** via prediction error (MSE loss) | |
| 4. **Gradients calculated** via `torch.autograd.grad()` | |
| 5. **Weights updated** via gradient descent: `param -= lr * grad` | |
| This is REAL test-time training. The neural network's weights physically changed. | |
| """ | |
| return output, create_weight_visualization(), create_history_plot() | |
| def check_surprise(content: str) -> str: | |
| """Check surprise WITHOUT learning.""" | |
| if not content.strip(): | |
| return "Please enter content to check." | |
| # REAL surprise computation (no learning) | |
| surprise = memory.surprise(content) | |
| return f"""## Surprise Check (No Learning) | |
| **Content:** "{content[:100]}{'...' if len(content) > 100 else ''}" | |
| **Surprise Score:** {surprise:.4f} | |
| Interpretation: | |
| - **< 0.3**: Very familiar - memory has seen similar patterns | |
| - **0.3 - 0.6**: Moderately novel | |
| - **> 0.6**: Highly novel - worth learning | |
| {'This content is FAMILIAR to the memory.' if surprise < 0.3 else 'This content is NOVEL to the memory.' if surprise > 0.6 else 'This content is somewhat familiar.'} | |
| """ | |
| def get_memory_stats() -> str: | |
| """Get real memory statistics.""" | |
| stats = memory.get_stats() | |
| return f"""## Memory Statistics | |
| | Metric | Value | | |
| |--------|-------| | |
| | **Total Observations** | {stats['total_observations']} | | |
| | **Parameters** | {stats['weight_parameters']:,} | | |
| | **Dimension** | {stats['dimension']} | | |
| | **Learning Rate** | {stats['learning_rate']:.4f} | | |
| | **Avg Recent Surprise** | {stats['avg_surprise']:.4f} | | |
| | **Current Weight Hash** | `{memory.get_weight_hash()}` | | |
| ### This is a Real Neural Network | |
| - **Architecture**: 2-layer MLP with GELU activation and LayerNorm | |
| - **Framework**: PyTorch with autograd | |
| - **Learning**: Test-time training via gradient descent | |
| - **Memory**: ~{stats['weight_parameters'] * 4 / 1024:.1f} KB of weights | |
| Unlike RAG which stores vectors in a database, this IS the memory. | |
| The weights encode everything learned. | |
| """ | |
| # ============================================================================= | |
| # KEY CONCEPTS (New Educational Tab) | |
| # ============================================================================= | |
| KEY_CONCEPTS_HTML = ''' | |
| <div style="font-family: 'Outfit', system-ui, sans-serif; padding: 20px; color: #f8fafc;"> | |
| <!-- The Problem --> | |
| <div style="background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; padding: 25px; margin-bottom: 25px; border: 1px solid rgba(252, 129, 129, 0.3);"> | |
| <h3 style="color: #fc8181; margin: 0 0 20px 0; display: flex; align-items: center; gap: 12px;"> | |
| <span style="font-size: 1.5em;">❌</span> The Problem: LLMs Have No Memory | |
| </h3> | |
| <div style="display: flex; gap: 20px; flex-wrap: wrap;"> | |
| <div style="flex: 1; min-width: 280px; background: rgba(0,0,0,0.3); border-radius: 12px; padding: 20px;"> | |
| <p style="color: #a0aec0; margin: 0 0 15px 0; font-size: 0.95em;">Every API call to an LLM starts <strong style="color: #fc8181;">fresh</strong>:</p> | |
| <div style="background: #0a0a1a; border-radius: 8px; padding: 15px; font-family: 'JetBrains Mono', monospace; font-size: 0.85em;"> | |
| <div style="color: #64748b;">// Call 1</div> | |
| <div style="color: #f8fafc;">User: "My name is Carlos"</div> | |
| <div style="color: #22c55e;">AI: "Nice to meet you, Carlos!"</div> | |
| <br/> | |
| <div style="color: #64748b;">// Call 2 (new session)</div> | |
| <div style="color: #f8fafc;">User: "What's my name?"</div> | |
| <div style="color: #fc8181;">AI: "I don't know your name."</div> | |
| </div> | |
| </div> | |
| <div style="flex: 1; min-width: 280px; background: rgba(0,0,0,0.3); border-radius: 12px; padding: 20px;"> | |
| <p style="color: #a0aec0; margin: 0 0 15px 0; font-size: 0.95em;">The model's weights are <strong style="color: #fc8181;">frozen</strong> after training:</p> | |
| <ul style="color: #a0aec0; margin: 0; padding-left: 20px; line-height: 1.8;"> | |
| <li>Can't learn new information</li> | |
| <li>Can't remember past conversations</li> | |
| <li>Can't adapt to user preferences</li> | |
| <li>Knowledge is static (training cutoff)</li> | |
| </ul> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Two Solutions --> | |
| <h3 style="color: #f8fafc; margin: 30px 0 20px 0; text-align: center; font-size: 1.3em;"> | |
| Two Solutions to Add Memory | |
| </h3> | |
| <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(350px, 1fr)); gap: 25px;"> | |
| <!-- RAG Solution --> | |
| <div style="background: linear-gradient(135deg, rgba(252, 129, 129, 0.1) 0%, rgba(237, 137, 54, 0.1) 100%); border: 2px solid #fc8181; border-radius: 16px; padding: 25px;"> | |
| <h4 style="color: #fc8181; margin: 0 0 15px 0; display: flex; align-items: center; gap: 10px;"> | |
| <span style="font-size: 1.3em;">📚</span> Solution A: RAG (Retrieval) | |
| </h4> | |
| <p style="color: #a0aec0; font-size: 0.9em; margin: 0 0 15px 0;"> | |
| <strong>Store</strong> information externally, <strong>retrieve</strong> relevant pieces when needed. | |
| </p> | |
| <div style="background: rgba(0,0,0,0.3); border-radius: 10px; padding: 15px; margin-bottom: 15px;"> | |
| <div style="display: flex; align-items: center; gap: 10px; margin-bottom: 10px;"> | |
| <span style="background: #fc8181; color: #1a1a2e; padding: 4px 10px; border-radius: 4px; font-size: 0.8em; font-weight: 600;">HOW</span> | |
| </div> | |
| <ol style="color: #a0aec0; margin: 0; padding-left: 20px; font-size: 0.9em; line-height: 1.7;"> | |
| <li>Convert facts to vectors (embeddings)</li> | |
| <li>Store in vector database</li> | |
| <li>On query, find similar vectors</li> | |
| <li>Pass retrieved docs to LLM</li> | |
| </ol> | |
| </div> | |
| <div style="display: flex; flex-wrap: wrap; gap: 8px;"> | |
| <span style="background: rgba(252, 129, 129, 0.2); color: #fc8181; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✓ Simple</span> | |
| <span style="background: rgba(252, 129, 129, 0.2); color: #fc8181; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✓ Scalable</span> | |
| <span style="background: rgba(100, 116, 139, 0.3); color: #94a3b8; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✗ No patterns</span> | |
| <span style="background: rgba(100, 116, 139, 0.3); color: #94a3b8; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✗ Grows</span> | |
| </div> | |
| </div> | |
| <!-- Neural Memory Solution --> | |
| <div style="background: linear-gradient(135deg, rgba(0, 212, 255, 0.1) 0%, rgba(168, 85, 247, 0.1) 100%); border: 2px solid #00d4ff; border-radius: 16px; padding: 25px;"> | |
| <h4 style="color: #00d4ff; margin: 0 0 15px 0; display: flex; align-items: center; gap: 10px;"> | |
| <span style="font-size: 1.3em;">🧠</span> Solution B: Neural Memory (Learning) | |
| </h4> | |
| <p style="color: #a0aec0; font-size: 0.9em; margin: 0 0 15px 0;"> | |
| <strong>Learn</strong> information into neural weights. Memory IS the network. | |
| </p> | |
| <div style="background: rgba(0,0,0,0.3); border-radius: 10px; padding: 15px; margin-bottom: 15px;"> | |
| <div style="display: flex; align-items: center; gap: 10px; margin-bottom: 10px;"> | |
| <span style="background: #00d4ff; color: #1a1a2e; padding: 4px 10px; border-radius: 4px; font-size: 0.8em; font-weight: 600;">HOW</span> | |
| </div> | |
| <ol style="color: #a0aec0; margin: 0; padding-left: 20px; font-size: 0.9em; line-height: 1.7;"> | |
| <li>Encode fact as tensor</li> | |
| <li>Forward pass through neural net</li> | |
| <li>Compute prediction error (surprise)</li> | |
| <li><strong style="color: #00d4ff;">Update weights</strong> via gradient descent</li> | |
| </ol> | |
| </div> | |
| <div style="display: flex; flex-wrap: wrap; gap: 8px;"> | |
| <span style="background: rgba(0, 212, 255, 0.2); color: #00d4ff; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✓ Learns patterns</span> | |
| <span style="background: rgba(0, 212, 255, 0.2); color: #00d4ff; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✓ Fixed size</span> | |
| <span style="background: rgba(0, 212, 255, 0.2); color: #00d4ff; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✓ Can infer</span> | |
| <span style="background: rgba(100, 116, 139, 0.3); color: #94a3b8; padding: 5px 12px; border-radius: 6px; font-size: 0.8em;">✗ Complex</span> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Test-Time Training Innovation --> | |
| <div style="background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; padding: 25px; margin-top: 25px; border: 1px solid rgba(0, 212, 255, 0.3);"> | |
| <h3 style="color: #00d4ff; margin: 0 0 20px 0; display: flex; align-items: center; gap: 12px;"> | |
| <span style="font-size: 1.5em;">⚡</span> The Innovation: Test-Time Training (TTT) | |
| </h3> | |
| <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 20px;"> | |
| <div style="background: rgba(0,0,0,0.3); border-radius: 12px; padding: 20px;"> | |
| <h5 style="color: #a855f7; margin: 0 0 10px 0;">Traditional Training</h5> | |
| <p style="color: #a0aec0; font-size: 0.9em; margin: 0; line-height: 1.6;"> | |
| Train once → Freeze weights → Deploy<br/> | |
| <span style="color: #64748b;">Model can't learn after deployment</span> | |
| </p> | |
| </div> | |
| <div style="background: rgba(0, 212, 255, 0.1); border-radius: 12px; padding: 20px; border: 1px solid rgba(0, 212, 255, 0.2);"> | |
| <h5 style="color: #00d4ff; margin: 0 0 10px 0;">Test-Time Training (Titans)</h5> | |
| <p style="color: #a0aec0; font-size: 0.9em; margin: 0; line-height: 1.6;"> | |
| Deploy → <strong style="color: #00d4ff;">Continue learning</strong> → Weights update<br/> | |
| <span style="color: #22c55e;">Model learns from every interaction</span> | |
| </p> | |
| </div> | |
| </div> | |
| <div style="margin-top: 20px; padding: 15px; background: rgba(0,0,0,0.3); border-radius: 10px;"> | |
| <p style="color: #a0aec0; margin: 0; font-size: 0.9em;"> | |
| <strong style="color: #f8fafc;">This demo implements real TTT:</strong> When you add a fact, actual PyTorch gradients flow and actual neural network weights change. This is not a simulation—it's the Titans architecture from Google's December 2024 paper. | |
| </p> | |
| </div> | |
| </div> | |
| </div> | |
| ''' | |
| # ============================================================================= | |
| # INCREMENTAL INTEGRATION DIAGRAMS | |
| # ============================================================================= | |
| VANILLA_LLM_DIAGRAM_HTML = ''' | |
| <div style="font-family: 'Outfit', system-ui, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff; margin-bottom: 20px; border: 1px solid rgba(148, 163, 184, 0.2);"> | |
| <h4 style="color: #94a3b8; margin: 0 0 20px 0; display: flex; align-items: center; gap: 10px;"> | |
| <span style="background: #374151; color: #f8fafc; padding: 4px 12px; border-radius: 6px; font-size: 0.8em;">Step 1</span> | |
| Vanilla LLM (The Problem) | |
| </h4> | |
| <div style="display: flex; align-items: center; justify-content: center; gap: 20px; flex-wrap: wrap;"> | |
| <div style="background: #2d3748; padding: 20px 30px; border-radius: 12px; text-align: center;"> | |
| <div style="font-size: 32px; margin-bottom: 10px;">👤</div> | |
| <div style="color: #f8fafc; font-weight: 500;">User Query</div> | |
| <div style="color: #64748b; font-size: 0.85em;">"What's my preference?"</div> | |
| </div> | |
| <div style="color: #64748b; font-size: 32px;">→</div> | |
| <div style="background: linear-gradient(135deg, #805ad5 0%, #553c9a 100%); padding: 20px 30px; border-radius: 12px; text-align: center; border: 2px solid #d6bcfa;"> | |
| <div style="font-size: 32px; margin-bottom: 10px;">🤖</div> | |
| <div style="color: #f8fafc; font-weight: 600;">LLM</div> | |
| <div style="color: #e9d8fd; font-size: 0.85em;">Frozen weights</div> | |
| </div> | |
| <div style="color: #64748b; font-size: 32px;">→</div> | |
| <div style="background: rgba(252, 129, 129, 0.2); padding: 20px 30px; border-radius: 12px; text-align: center; border: 2px solid #fc8181;"> | |
| <div style="font-size: 32px; margin-bottom: 10px;">❓</div> | |
| <div style="color: #fc8181; font-weight: 500;">No Memory</div> | |
| <div style="color: #a0aec0; font-size: 0.85em;">"I don't know"</div> | |
| </div> | |
| </div> | |
| <div style="margin-top: 15px; padding: 12px; background: rgba(252, 129, 129, 0.1); border-radius: 8px; text-align: center;"> | |
| <span style="color: #fc8181; font-size: 0.9em;">⚠️ LLM has no way to remember user-specific information between sessions</span> | |
| </div> | |
| </div> | |
| ''' | |
| RAG_INTEGRATION_DIAGRAM_HTML = ''' | |
| <div style="font-family: 'Outfit', system-ui, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff; margin-bottom: 20px; border: 1px solid rgba(255, 140, 66, 0.3);"> | |
| <h4 style="color: #ff8c42; margin: 0 0 20px 0; display: flex; align-items: center; gap: 10px;"> | |
| <span style="background: #ff8c42; color: #1a1a2e; padding: 4px 12px; border-radius: 6px; font-size: 0.8em;">Step 2a</span> | |
| Adding RAG (Retrieval-Augmented Generation) | |
| </h4> | |
| <div style="display: flex; align-items: center; justify-content: center; gap: 15px; flex-wrap: wrap;"> | |
| <div style="background: #2d3748; padding: 15px 20px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px;">👤</div> | |
| <div style="color: #f8fafc; font-size: 0.9em;">Query</div> | |
| </div> | |
| <div style="color: #ff8c42; font-size: 24px;">→</div> | |
| <div style="background: rgba(255, 140, 66, 0.2); padding: 15px 20px; border-radius: 10px; text-align: center; border: 1px dashed #ff8c42;"> | |
| <div style="font-size: 24px;">🔍</div> | |
| <div style="color: #ff8c42; font-size: 0.9em;">Retriever</div> | |
| <div style="color: #64748b; font-size: 0.75em;">keyword match</div> | |
| </div> | |
| <div style="color: #ff8c42; font-size: 24px;">→</div> | |
| <div style="background: #744210; padding: 15px 20px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px;">🗃️</div> | |
| <div style="color: #faf089; font-size: 0.9em;">Vector DB</div> | |
| <div style="color: #64748b; font-size: 0.75em;">top-k docs</div> | |
| </div> | |
| <div style="color: #ff8c42; font-size: 24px;">→</div> | |
| <div style="background: #3182ce; padding: 15px 20px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px;">📋</div> | |
| <div style="color: #bee3f8; font-size: 0.9em;">Context</div> | |
| <div style="color: #64748b; font-size: 0.75em;">prompt injection</div> | |
| </div> | |
| <div style="color: #ff8c42; font-size: 24px;">→</div> | |
| <div style="background: linear-gradient(135deg, #805ad5 0%, #553c9a 100%); padding: 15px 20px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px;">🤖</div> | |
| <div style="color: #f8fafc; font-size: 0.9em;">LLM</div> | |
| </div> | |
| </div> | |
| <div style="margin-top: 15px; display: grid; grid-template-columns: 1fr 1fr; gap: 10px;"> | |
| <div style="padding: 10px; background: rgba(34, 197, 94, 0.1); border-radius: 6px;"> | |
| <span style="color: #22c55e; font-size: 0.85em;">✓ External memory storage</span> | |
| </div> | |
| <div style="padding: 10px; background: rgba(252, 129, 129, 0.1); border-radius: 6px;"> | |
| <span style="color: #fc8181; font-size: 0.85em;">✗ No pattern learning</span> | |
| </div> | |
| </div> | |
| </div> | |
| ''' | |
| NEURAL_MEMORY_INTEGRATION_DIAGRAM_HTML = ''' | |
| <div style="font-family: 'Outfit', system-ui, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff; margin-bottom: 20px; border: 1px solid rgba(0, 212, 255, 0.3);"> | |
| <h4 style="color: #00d4ff; margin: 0 0 20px 0; display: flex; align-items: center; gap: 10px;"> | |
| <span style="background: #00d4ff; color: #1a1a2e; padding: 4px 12px; border-radius: 6px; font-size: 0.8em;">Step 2b</span> | |
| Adding Neural Memory (Test-Time Training) | |
| </h4> | |
| <div style="display: flex; align-items: center; justify-content: center; gap: 15px; flex-wrap: wrap;"> | |
| <div style="background: #2d3748; padding: 15px 20px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px;">👤</div> | |
| <div style="color: #f8fafc; font-size: 0.9em;">Query</div> | |
| </div> | |
| <div style="color: #00d4ff; font-size: 24px;">→</div> | |
| <div style="background: rgba(0, 212, 255, 0.2); padding: 15px 20px; border-radius: 10px; text-align: center; border: 2px solid #00d4ff;"> | |
| <div style="font-size: 24px;">🧠</div> | |
| <div style="color: #00d4ff; font-size: 0.9em; font-weight: 600;">Neural Memory</div> | |
| <div style="color: #64748b; font-size: 0.75em;">TTT Module</div> | |
| </div> | |
| <div style="color: #00d4ff; font-size: 24px;">→</div> | |
| <div style="background: #2f855a; padding: 15px 20px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px;">📊</div> | |
| <div style="color: #9ae6b4; font-size: 0.9em;">Patterns</div> | |
| <div style="color: #64748b; font-size: 0.75em;">+ surprise</div> | |
| </div> | |
| <div style="color: #00d4ff; font-size: 24px;">→</div> | |
| <div style="background: #3182ce; padding: 15px 20px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px;">📋</div> | |
| <div style="color: #bee3f8; font-size: 0.9em;">Rich Context</div> | |
| <div style="color: #64748b; font-size: 0.75em;">all facts + hints</div> | |
| </div> | |
| <div style="color: #00d4ff; font-size: 24px;">→</div> | |
| <div style="background: linear-gradient(135deg, #805ad5 0%, #553c9a 100%); padding: 15px 20px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px;">🤖</div> | |
| <div style="color: #f8fafc; font-size: 0.9em;">LLM</div> | |
| </div> | |
| </div> | |
| <div style="margin-top: 15px; display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 10px;"> | |
| <div style="padding: 10px; background: rgba(34, 197, 94, 0.1); border-radius: 6px;"> | |
| <span style="color: #22c55e; font-size: 0.85em;">✓ Learns patterns</span> | |
| </div> | |
| <div style="padding: 10px; background: rgba(34, 197, 94, 0.1); border-radius: 6px;"> | |
| <span style="color: #22c55e; font-size: 0.85em;">✓ Fixed memory size</span> | |
| </div> | |
| <div style="padding: 10px; background: rgba(34, 197, 94, 0.1); border-radius: 6px;"> | |
| <span style="color: #22c55e; font-size: 0.85em;">✓ Can infer/predict</span> | |
| </div> | |
| </div> | |
| </div> | |
| ''' | |
| DOCKER_DEPLOYMENT_DIAGRAM_HTML = ''' | |
| <div style="font-family: 'Outfit', system-ui, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff; border: 1px solid rgba(168, 85, 247, 0.3);"> | |
| <h4 style="color: #a855f7; margin: 0 0 20px 0; display: flex; align-items: center; gap: 10px;"> | |
| <span style="background: #a855f7; color: #1a1a2e; padding: 4px 12px; border-radius: 6px; font-size: 0.8em;">Step 3</span> | |
| Docker Deployment (Production Ready) | |
| </h4> | |
| <div style="display: flex; align-items: stretch; justify-content: center; gap: 20px; flex-wrap: wrap;"> | |
| <!-- Docker Container --> | |
| <div style="background: rgba(168, 85, 247, 0.1); border: 2px solid #a855f7; border-radius: 12px; padding: 20px; min-width: 280px;"> | |
| <div style="display: flex; align-items: center; gap: 10px; margin-bottom: 15px;"> | |
| <span style="font-size: 1.5em;">🐳</span> | |
| <span style="color: #a855f7; font-weight: 600;">Docker Container</span> | |
| </div> | |
| <div style="display: flex; flex-direction: column; gap: 10px;"> | |
| <div style="background: rgba(0, 212, 255, 0.2); padding: 10px; border-radius: 8px; border: 1px solid rgba(0, 212, 255, 0.3);"> | |
| <div style="color: #00d4ff; font-size: 0.85em; font-weight: 500;">🧠 Neural Memory</div> | |
| <div style="color: #64748b; font-size: 0.75em;">PyTorch TTT Module</div> | |
| </div> | |
| <div style="background: rgba(34, 197, 94, 0.2); padding: 10px; border-radius: 8px; border: 1px solid rgba(34, 197, 94, 0.3);"> | |
| <div style="color: #22c55e; font-size: 0.85em; font-weight: 500;">🔌 MCP Server</div> | |
| <div style="color: #64748b; font-size: 0.75em;">Claude Desktop Integration</div> | |
| </div> | |
| <div style="background: rgba(255, 140, 66, 0.2); padding: 10px; border-radius: 8px; border: 1px solid rgba(255, 140, 66, 0.3);"> | |
| <div style="color: #ff8c42; font-size: 0.85em; font-weight: 500;">🌐 HTTP API</div> | |
| <div style="color: #64748b; font-size: 0.75em;">REST Endpoints</div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Volume --> | |
| <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; gap: 10px;"> | |
| <div style="color: #64748b; font-size: 24px;">↔</div> | |
| <div style="background: #374151; padding: 15px 20px; border-radius: 10px; text-align: center;"> | |
| <div style="font-size: 24px;">💾</div> | |
| <div style="color: #f8fafc; font-size: 0.9em;">Volume</div> | |
| <div style="color: #64748b; font-size: 0.75em;">Checkpoints</div> | |
| </div> | |
| </div> | |
| </div> | |
| <div style="margin-top: 20px; padding: 15px; background: rgba(0,0,0,0.3); border-radius: 10px;"> | |
| <div style="color: #a0aec0; font-size: 0.9em;"> | |
| <strong style="color: #a855f7;">Why Docker?</strong> Learned neural weights persist across container restarts via Docker volumes. Deploy anywhere with identical behavior. Version control your AI's memory state like Git commits. | |
| </div> | |
| </div> | |
| </div> | |
| ''' | |
| # ============================================================================= | |
| # DOCKER ECOSYSTEM INTEGRATION | |
| # ============================================================================= | |
| DOCKER_INTEGRATION_MD = """ | |
| ## Docker Ecosystem Integration | |
| This neural memory is designed for **containerized deployment** with full Docker integration. | |
| ### MCP Server Interface | |
| The memory exposes tools via Model Context Protocol (MCP): | |
| ```python | |
| # MCP Tools Available | |
| @mcp.tool() | |
| def observe(content: str) -> dict: | |
| '''Feed context, trigger learning.''' | |
| return memory.observe(content) | |
| @mcp.tool() | |
| def surprise(content: str) -> float: | |
| '''Measure novelty without learning.''' | |
| return memory.surprise(content) | |
| @mcp.tool() | |
| def checkpoint(name: str) -> str: | |
| '''Save learned state to Docker volume.''' | |
| return save_checkpoint(name) | |
| @mcp.tool() | |
| def restore(name: str) -> str: | |
| '''Load previous state from Docker volume.''' | |
| return load_checkpoint(name) | |
| ``` | |
| ### Docker Compose Deployment | |
| ```yaml | |
| version: '3.8' | |
| services: | |
| neural-memory: | |
| build: . | |
| ports: | |
| - "8000:8000" # MCP server | |
| volumes: | |
| - memory-state:/app/checkpoints # Persistent state | |
| environment: | |
| - MEMORY_DIM=512 | |
| - LEARNING_RATE=0.01 | |
| volumes: | |
| memory-state: # State survives container restarts | |
| ``` | |
| ### Key Docker-Native Features | |
| | Feature | Implementation | | |
| |---------|---------------| | |
| | **State Persistence** | Docker volumes for checkpoints | | |
| | **Horizontal Scaling** | Stateless inference, shared state via volume | | |
| | **CI/CD Integration** | GitHub Actions with Docker build | | |
| | **Resource Control** | Container limits for GPU/memory | | |
| | **Health Checks** | `/health` endpoint with memory stats | | |
| ### Why Docker + Neural Memory? | |
| 1. **Containerized AI Memory**: Package learned state with your app | |
| 2. **Version Control**: Checkpoint states like Git commits | |
| 3. **Reproducibility**: Same container = same behavior | |
| 4. **Orchestration Ready**: Deploy to Kubernetes, ECS, etc. | |
| 5. **MCP Protocol**: Claude Desktop integration via container | |
| --- | |
| *This project demonstrates production-grade AI infrastructure with Docker.* | |
| """ | |
| # ============================================================================= | |
| # ARCHITECTURE DIAGRAMS (How It Works) | |
| # ============================================================================= | |
| ARCHITECTURE_INTRO_MD = """ | |
| ## How It Works: Neural Memory vs RAG Architecture | |
| This section provides a detailed look at how both systems process information and connect to the LLM. | |
| The diagrams below are **faithful representations of our actual implementation**. | |
| --- | |
| """ | |
| NEURAL_MEMORY_DIAGRAM_HTML = """ | |
| <div style="font-family: system-ui, -apple-system, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff;"> | |
| <h3 style="text-align: center; color: #00d4ff; margin-bottom: 30px; font-size: 1.5em;"> | |
| Neural Memory Pipeline (Test-Time Training) | |
| </h3> | |
| <!-- Main Flow --> | |
| <div style="display: flex; flex-direction: column; gap: 20px; max-width: 900px; margin: 0 auto;"> | |
| <!-- Phase 1: Learning Phase --> | |
| <div style="background: rgba(0, 212, 255, 0.1); border: 2px solid #00d4ff; border-radius: 12px; padding: 20px;"> | |
| <h4 style="color: #00d4ff; margin: 0 0 15px 0;">Phase 1: Learning (When Facts Are Added)</h4> | |
| <div style="display: flex; align-items: center; gap: 15px; flex-wrap: wrap; justify-content: center;"> | |
| <!-- Input --> | |
| <div style="background: #2d3748; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 120px;"> | |
| <div style="font-size: 24px;">📝</div> | |
| <div style="font-weight: bold; color: #fff;">User Fact</div> | |
| <div style="font-size: 11px; color: #a0aec0;">"Carlos rejected<br/>bright colors"</div> | |
| </div> | |
| <div style="color: #00d4ff; font-size: 24px;">→</div> | |
| <!-- Encode --> | |
| <div style="background: #553c9a; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px;"> | |
| <div style="font-size: 24px;">🔢</div> | |
| <div style="font-weight: bold; color: #fff;">_encode_text()</div> | |
| <div style="font-size: 11px; color: #d6bcfa;">Tensor [1, 64, 256]</div> | |
| </div> | |
| <div style="color: #00d4ff; font-size: 24px;">→</div> | |
| <!-- Forward Pass --> | |
| <div style="background: #2f855a; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px;"> | |
| <div style="font-size: 24px;">🧠</div> | |
| <div style="font-weight: bold; color: #fff;">memory_net(x)</div> | |
| <div style="font-size: 11px; color: #9ae6b4;">2-layer MLP<br/>~250K params</div> | |
| </div> | |
| <div style="color: #00d4ff; font-size: 24px;">→</div> | |
| <!-- Compute Loss --> | |
| <div style="background: #c53030; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px;"> | |
| <div style="font-size: 24px;">📊</div> | |
| <div style="font-weight: bold; color: #fff;">MSE Loss</div> | |
| <div style="font-size: 11px; color: #feb2b2;">Surprise Score<br/>= Prediction Error</div> | |
| </div> | |
| <div style="color: #00d4ff; font-size: 24px;">→</div> | |
| <!-- Gradient Descent --> | |
| <div style="background: #d69e2e; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 160px; border: 3px solid #faf089;"> | |
| <div style="font-size: 24px;">⚡</div> | |
| <div style="font-weight: bold; color: #1a202c;">WEIGHT UPDATE</div> | |
| <div style="font-size: 11px; color: #744210;">torch.autograd.grad()<br/>param -= lr × grad</div> | |
| </div> | |
| </div> | |
| <div style="margin-top: 15px; padding: 10px; background: rgba(0,0,0,0.3); border-radius: 8px; font-size: 12px; color: #a0aec0;"> | |
| <strong style="color: #00d4ff;">Key Point:</strong> The neural network's weights physically change after each fact. | |
| This is real gradient descent happening at inference time (Test-Time Training / Titans architecture). | |
| </div> | |
| </div> | |
| <!-- Phase 2: Query Phase --> | |
| <div style="background: rgba(72, 187, 120, 0.1); border: 2px solid #48bb78; border-radius: 12px; padding: 20px;"> | |
| <h4 style="color: #48bb78; margin: 0 0 15px 0;">Phase 2: Query (When Questions Are Asked)</h4> | |
| <div style="display: flex; align-items: center; gap: 15px; flex-wrap: wrap; justify-content: center;"> | |
| <!-- Question --> | |
| <div style="background: #2d3748; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 120px;"> | |
| <div style="font-size: 24px;">❓</div> | |
| <div style="font-weight: bold; color: #fff;">Question</div> | |
| <div style="font-size: 11px; color: #a0aec0;">"Will Carlos<br/>like neon?"</div> | |
| </div> | |
| <div style="color: #48bb78; font-size: 24px;">→</div> | |
| <!-- Surprise Check --> | |
| <div style="background: #553c9a; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 130px;"> | |
| <div style="font-size: 24px;">🎯</div> | |
| <div style="font-weight: bold; color: #fff;">surprise()</div> | |
| <div style="font-size: 11px; color: #d6bcfa;">Novelty Score<br/>(No Learning)</div> | |
| </div> | |
| <div style="color: #48bb78; font-size: 24px;">→</div> | |
| <!-- Context Builder --> | |
| <div style="background: #2f855a; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 160px;"> | |
| <div style="font-size: 24px;">📦</div> | |
| <div style="font-weight: bold; color: #fff;">Build Context</div> | |
| <div style="font-size: 11px; color: #9ae6b4;"><strong>ALL facts</strong><br/>+ Pattern hints<br/>+ Surprise score</div> | |
| </div> | |
| <div style="color: #48bb78; font-size: 24px;">→</div> | |
| <!-- System Prompt --> | |
| <div style="background: #3182ce; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 180px;"> | |
| <div style="font-size: 24px;">💬</div> | |
| <div style="font-weight: bold; color: #fff;">System Prompt</div> | |
| <div style="font-size: 10px; color: #bee3f8; text-align: left; margin-top: 5px;"> | |
| "You have LEARNED from:<br/> | |
| • All 4 observations<br/> | |
| • Identify PATTERNS<br/> | |
| • Make INFERENCES" | |
| </div> | |
| </div> | |
| <div style="color: #48bb78; font-size: 24px;">→</div> | |
| <!-- LLM --> | |
| <div style="background: linear-gradient(135deg, #805ad5 0%, #553c9a 100%); padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px; border: 3px solid #d6bcfa;"> | |
| <div style="font-size: 24px;">🤖</div> | |
| <div style="font-weight: bold; color: #fff;">LLM</div> | |
| <div style="font-size: 11px; color: #e9d8fd;">SmolLM3-3B<br/>(HuggingFace)</div> | |
| </div> | |
| </div> | |
| <div style="margin-top: 15px; padding: 10px; background: rgba(0,0,0,0.3); border-radius: 8px; font-size: 12px; color: #a0aec0;"> | |
| <strong style="color: #48bb78;">Key Point:</strong> The LLM receives ALL facts + learned pattern hints + novelty indicator. | |
| It's instructed to identify patterns and make inferences, not just retrieve. | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| RAG_DIAGRAM_HTML = """ | |
| <div style="font-family: system-ui, -apple-system, sans-serif; padding: 20px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 16px; color: #fff; margin-top: 20px;"> | |
| <h3 style="text-align: center; color: #fc8181; margin-bottom: 30px; font-size: 1.5em;"> | |
| RAG Pipeline (Retrieval-Augmented Generation) | |
| </h3> | |
| <!-- Main Flow --> | |
| <div style="display: flex; flex-direction: column; gap: 20px; max-width: 900px; margin: 0 auto;"> | |
| <!-- Phase 1: Storage Phase --> | |
| <div style="background: rgba(252, 129, 129, 0.1); border: 2px solid #fc8181; border-radius: 12px; padding: 20px;"> | |
| <h4 style="color: #fc8181; margin: 0 0 15px 0;">Phase 1: Storage (When Facts Are Added)</h4> | |
| <div style="display: flex; align-items: center; gap: 15px; flex-wrap: wrap; justify-content: center;"> | |
| <!-- Input --> | |
| <div style="background: #2d3748; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 120px;"> | |
| <div style="font-size: 24px;">📝</div> | |
| <div style="font-weight: bold; color: #fff;">User Fact</div> | |
| <div style="font-size: 11px; color: #a0aec0;">"Carlos rejected<br/>bright colors"</div> | |
| </div> | |
| <div style="color: #fc8181; font-size: 24px;">→</div> | |
| <!-- Append to List --> | |
| <div style="background: #744210; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 180px;"> | |
| <div style="font-size: 24px;">📋</div> | |
| <div style="font-weight: bold; color: #fff;">knowledge_base.append()</div> | |
| <div style="font-size: 11px; color: #faf089;">Simple list storage<br/>No transformation</div> | |
| </div> | |
| <div style="color: #fc8181; font-size: 24px;">→</div> | |
| <!-- Storage --> | |
| <div style="background: #2d3748; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px; border: 2px dashed #a0aec0;"> | |
| <div style="font-size: 24px;">🗃️</div> | |
| <div style="font-weight: bold; color: #fff;">Document Store</div> | |
| <div style="font-size: 11px; color: #a0aec0;">List of strings<br/>Grows with data</div> | |
| </div> | |
| </div> | |
| <div style="margin-top: 15px; padding: 10px; background: rgba(0,0,0,0.3); border-radius: 8px; font-size: 12px; color: #a0aec0;"> | |
| <strong style="color: #fc8181;">Key Point:</strong> Facts are simply stored as-is. <strong>No learning occurs.</strong> | |
| The system doesn't understand relationships or patterns between facts. | |
| </div> | |
| </div> | |
| <!-- Phase 2: Retrieval Phase --> | |
| <div style="background: rgba(237, 137, 54, 0.1); border: 2px solid #ed8936; border-radius: 12px; padding: 20px;"> | |
| <h4 style="color: #ed8936; margin: 0 0 15px 0;">Phase 2: Query (When Questions Are Asked)</h4> | |
| <div style="display: flex; align-items: center; gap: 15px; flex-wrap: wrap; justify-content: center;"> | |
| <!-- Question --> | |
| <div style="background: #2d3748; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 120px;"> | |
| <div style="font-size: 24px;">❓</div> | |
| <div style="font-weight: bold; color: #fff;">Question</div> | |
| <div style="font-size: 11px; color: #a0aec0;">"Will Carlos<br/>like neon?"</div> | |
| </div> | |
| <div style="color: #ed8936; font-size: 24px;">→</div> | |
| <!-- Tokenize --> | |
| <div style="background: #553c9a; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 130px;"> | |
| <div style="font-size: 24px;">✂️</div> | |
| <div style="font-weight: bold; color: #fff;">Tokenize</div> | |
| <div style="font-size: 11px; color: #d6bcfa;">Split into words<br/>{"will", "carlos",<br/>"like", "neon"}</div> | |
| </div> | |
| <div style="color: #ed8936; font-size: 24px;">→</div> | |
| <!-- Keyword Match --> | |
| <div style="background: #c53030; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 160px;"> | |
| <div style="font-size: 24px;">🔍</div> | |
| <div style="font-weight: bold; color: #fff;">Keyword Overlap</div> | |
| <div style="font-size: 11px; color: #feb2b2;">Count matching words<br/>between Q and each fact</div> | |
| </div> | |
| <div style="color: #ed8936; font-size: 24px;">→</div> | |
| <!-- Top-K --> | |
| <div style="background: #744210; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 130px; border: 3px solid #faf089;"> | |
| <div style="font-size: 24px;">🏆</div> | |
| <div style="font-weight: bold; color: #fff;">Top-2 Facts</div> | |
| <div style="font-size: 11px; color: #faf089;">Only highest<br/>overlap scores</div> | |
| </div> | |
| <div style="color: #ed8936; font-size: 24px;">→</div> | |
| <!-- System Prompt --> | |
| <div style="background: #3182ce; padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 180px;"> | |
| <div style="font-size: 24px;">💬</div> | |
| <div style="font-weight: bold; color: #fff;">System Prompt</div> | |
| <div style="font-size: 10px; color: #bee3f8; text-align: left; margin-top: 5px;"> | |
| "You are a RAG system.<br/> | |
| ONLY use retrieved facts.<br/> | |
| If not covered, say so." | |
| </div> | |
| </div> | |
| <div style="color: #ed8936; font-size: 24px;">→</div> | |
| <!-- LLM --> | |
| <div style="background: linear-gradient(135deg, #805ad5 0%, #553c9a 100%); padding: 15px 20px; border-radius: 8px; text-align: center; min-width: 140px; border: 3px solid #d6bcfa;"> | |
| <div style="font-size: 24px;">🤖</div> | |
| <div style="font-weight: bold; color: #fff;">LLM</div> | |
| <div style="font-size: 11px; color: #e9d8fd;">SmolLM3-3B<br/>(Same model!)</div> | |
| </div> | |
| </div> | |
| <div style="margin-top: 15px; padding: 10px; background: rgba(0,0,0,0.3); border-radius: 8px; font-size: 12px; color: #a0aec0;"> | |
| <strong style="color: #ed8936;">Key Point:</strong> The LLM only sees 2 retrieved facts (not all 4). | |
| "neon" ≠ "bright" keyword-wise, so relevant facts may not be retrieved! | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| LLM_INTEGRATION_MD = """ | |
| --- | |
| ## How Each System Connects to the LLM | |
| Both systems use the **exact same LLM** (HuggingFace SmolLM3-3B). The difference is **what context they provide**. | |
| ### Neural Memory → LLM Connection | |
| ``` | |
| ┌─────────────────────────────────────────────────────────────────────┐ | |
| │ SYSTEM PROMPT (Neural Memory) │ | |
| ├─────────────────────────────────────────────────────────────────────┤ | |
| │ "You are an AI with neural memory that has LEARNED from all │ | |
| │ observations below. Unlike simple retrieval, you should: │ | |
| │ │ | |
| │ 1. Consider ALL facts holistically │ | |
| │ 2. Identify PATTERNS across multiple observations │ | |
| │ 3. Make INFERENCES based on learned patterns │ | |
| │ 4. Predict based on trends, not just direct matches │ | |
| │ │ | |
| │ Observations (learned knowledge): │ | |
| │ - Carlos rejected the bright colorful design │ | |
| │ - Carlos rejected the flashy animated homepage │ | |
| │ - Carlos approved the minimalist dark layout │ | |
| │ - Carlos approved the clean monochrome interface │ | |
| │ │ | |
| │ Learned patterns from observations: │ | |
| │ - Positive signals: 2 approvals │ | |
| │ - Negative signals: 2 rejections │ | |
| │ - Look for common themes in approved vs rejected items │ | |
| │ │ | |
| │ Question novelty (surprise score): 0.89 │ | |
| │ - High surprise (>0.6): This is a novel topic, be cautious" │ | |
| ├─────────────────────────────────────────────────────────────────────┤ | |
| │ USER: "We have a new UI mockup with neon colors - will Carlos │ | |
| │ like it?" │ | |
| └─────────────────────────────────────────────────────────────────────┘ | |
| ``` | |
| **What the Neural Memory provides:** | |
| | Component | Purpose | | |
| |-----------|---------| | |
| | **ALL facts** | Complete context for holistic reasoning | | |
| | **Pattern hints** | Extracted approval/rejection counts | | |
| | **Surprise score** | Indicates if question is familiar or novel | | |
| | **Inference instructions** | Tells LLM to identify patterns and predict | | |
| --- | |
| ### RAG → LLM Connection | |
| ``` | |
| ┌─────────────────────────────────────────────────────────────────────┐ | |
| │ SYSTEM PROMPT (RAG) │ | |
| ├─────────────────────────────────────────────────────────────────────┤ | |
| │ "You are a RAG system. You can ONLY use the retrieved facts below │ | |
| │ to answer. If the retrieved facts don't directly answer the │ | |
| │ question, say 'The retrieved information doesn't cover this.' │ | |
| │ │ | |
| │ Retrieved facts: │ | |
| │ - Carlos rejected the bright colorful design │ | |
| │ (Only 1 fact retrieved - 'neon' didn't match other keywords!) │ | |
| ├─────────────────────────────────────────────────────────────────────┤ | |
| │ USER: "We have a new UI mockup with neon colors - will Carlos │ | |
| │ like it?" │ | |
| └─────────────────────────────────────────────────────────────────────┘ | |
| ``` | |
| **What RAG provides:** | |
| | Component | Purpose | | |
| |-----------|---------| | |
| | **Top-2 facts only** | Limited context based on keyword overlap | | |
| | **No pattern info** | System doesn't analyze relationships | | |
| | **No novelty signal** | No indication of question familiarity | | |
| | **Strict retrieval instructions** | Tells LLM to only use retrieved facts | | |
| --- | |
| ## The Critical Difference: What Goes Into the LLM | |
| """ | |
| COMPARISON_TABLE_HTML = """ | |
| <div style="font-family: system-ui, -apple-system, sans-serif; padding: 20px; background: #1a1a2e; border-radius: 16px; color: #fff; margin: 20px 0;"> | |
| <h3 style="text-align: center; color: #fff; margin-bottom: 20px;">Side-by-Side: What the LLM Receives</h3> | |
| <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;"> | |
| <!-- Neural Memory Column --> | |
| <div style="background: rgba(0, 212, 255, 0.1); border: 2px solid #00d4ff; border-radius: 12px; padding: 20px;"> | |
| <h4 style="color: #00d4ff; text-align: center; margin: 0 0 15px 0;">🧠 Neural Memory</h4> | |
| <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;"> | |
| <div style="color: #48bb78; font-weight: bold; margin-bottom: 8px;">✅ Facts Provided:</div> | |
| <div style="font-size: 13px; color: #a0aec0;">ALL 4 facts (complete knowledge)</div> | |
| </div> | |
| <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;"> | |
| <div style="color: #48bb78; font-weight: bold; margin-bottom: 8px;">✅ Pattern Analysis:</div> | |
| <div style="font-size: 13px; color: #a0aec0;"> | |
| • 2 approvals identified<br/> | |
| • 2 rejections identified<br/> | |
| • "Look for common themes" | |
| </div> | |
| </div> | |
| <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;"> | |
| <div style="color: #48bb78; font-weight: bold; margin-bottom: 8px;">✅ Novelty Signal:</div> | |
| <div style="font-size: 13px; color: #a0aec0;">Surprise score: 0.89 (novel topic)</div> | |
| </div> | |
| <div style="background: #2d3748; border-radius: 8px; padding: 15px;"> | |
| <div style="color: #48bb78; font-weight: bold; margin-bottom: 8px;">✅ Instructions:</div> | |
| <div style="font-size: 13px; color: #a0aec0;"> | |
| "Identify PATTERNS"<br/> | |
| "Make INFERENCES"<br/> | |
| "Predict based on trends" | |
| </div> | |
| </div> | |
| </div> | |
| <!-- RAG Column --> | |
| <div style="background: rgba(252, 129, 129, 0.1); border: 2px solid #fc8181; border-radius: 12px; padding: 20px;"> | |
| <h4 style="color: #fc8181; text-align: center; margin: 0 0 15px 0;">📚 RAG</h4> | |
| <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;"> | |
| <div style="color: #fc8181; font-weight: bold; margin-bottom: 8px;">⚠️ Facts Provided:</div> | |
| <div style="font-size: 13px; color: #a0aec0;">Only 1-2 facts (keyword match)<br/> | |
| <span style="color: #fc8181; font-size: 11px;">"neon" ≠ "bright", "colorful", etc.</span></div> | |
| </div> | |
| <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;"> | |
| <div style="color: #fc8181; font-weight: bold; margin-bottom: 8px;">❌ Pattern Analysis:</div> | |
| <div style="font-size: 13px; color: #a0aec0;">None - no relationship detection</div> | |
| </div> | |
| <div style="background: #2d3748; border-radius: 8px; padding: 15px; margin-bottom: 15px;"> | |
| <div style="color: #fc8181; font-weight: bold; margin-bottom: 8px;">❌ Novelty Signal:</div> | |
| <div style="font-size: 13px; color: #a0aec0;">None - no familiarity indicator</div> | |
| </div> | |
| <div style="background: #2d3748; border-radius: 8px; padding: 15px;"> | |
| <div style="color: #fc8181; font-weight: bold; margin-bottom: 8px;">⚠️ Instructions:</div> | |
| <div style="font-size: 13px; color: #a0aec0;"> | |
| "ONLY use retrieved facts"<br/> | |
| "If not covered, say so"<br/> | |
| <span style="color: #fc8181; font-size: 11px;">No inference allowed</span> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| ARCHITECTURE_SUMMARY_MD = """ | |
| --- | |
| ## Why This Architecture Matters | |
| ### The Learning Advantage | |
| | Aspect | Neural Memory | RAG | | |
| |--------|---------------|-----| | |
| | **Storage** | Fixed neural weights (~250K params) | Growing document store | | |
| | **Learning** | Yes - weights update per observation | No - just stores text | | |
| | **Retrieval** | Not needed - patterns in weights | Required - keyword matching | | |
| | **Inference** | Can generalize to novel queries | Limited to direct matches | | |
| | **Memory Size** | Constant (doesn't grow) | Linear growth with data | | |
| ### When Neural Memory Wins | |
| The architecture shines when: | |
| 1. **Pattern Recognition Required** - "Carlos likes X, dislikes Y" → predict for Z | |
| 2. **Novel Queries** - Question keywords don't match stored facts | |
| 3. **Holistic Reasoning** - Answer requires synthesizing multiple facts | |
| 4. **Bounded Memory** - Can't afford growing storage | |
| ### When RAG Might Be Better | |
| RAG is simpler when: | |
| 1. **Exact Retrieval** - "What did the document say about X?" | |
| 2. **Large Corpus** - Millions of documents to search | |
| 3. **No Patterns** - Facts are independent, not related | |
| 4. **Transparency** - Need to cite exact source documents | |
| --- | |
| ## Technical Implementation Details | |
| ### Neural Memory Architecture | |
| ``` | |
| Input Text | |
| │ | |
| ▼ | |
| ┌─────────────────────────────────────────────┐ | |
| │ _encode_text(text) │ | |
| │ ┌─────────────────────────────────────────┐ │ | |
| │ │ 1. Convert to ASCII ordinals │ │ | |
| │ │ 2. Pad/truncate to max_seq_len (64) │ │ | |
| │ │ 3. Project to dimension (256) │ │ | |
| │ │ 4. Output: Tensor [1, 64, 256] │ │ | |
| │ └─────────────────────────────────────────┘ │ | |
| └─────────────────────────────────────────────┘ | |
| │ | |
| ▼ | |
| ┌─────────────────────────────────────────────┐ | |
| │ memory_net (nn.Sequential) │ | |
| │ ┌─────────────────────────────────────────┐ │ | |
| │ │ Linear(256 → 1024) │ │ | |
| │ │ GELU activation │ │ | |
| │ │ LayerNorm(1024) │ │ | |
| │ │ Linear(1024 → 256) │ │ | |
| │ └─────────────────────────────────────────┘ │ | |
| │ Total: ~262K parameters │ | |
| └─────────────────────────────────────────────┘ | |
| │ | |
| ▼ | |
| ┌─────────────────────────────────────────────┐ | |
| │ _compute_surprise_tensor(input, output) │ | |
| │ ┌─────────────────────────────────────────┐ │ | |
| │ │ loss = MSE(output, target) │ │ | |
| │ │ surprise = sigmoid(loss) scaled to 0-1 │ │ | |
| │ └─────────────────────────────────────────┘ │ | |
| └─────────────────────────────────────────────┘ | |
| │ | |
| ▼ | |
| ┌─────────────────────────────────────────────┐ | |
| │ _update_weights(loss) [IF learn=True] │ | |
| │ ┌─────────────────────────────────────────┐ │ | |
| │ │ grads = torch.autograd.grad(loss, θ) │ │ | |
| │ │ for each (param, grad): │ │ | |
| │ │ param -= learning_rate × grad │ │ | |
| │ └─────────────────────────────────────────┘ │ | |
| │ ⚡ This is the key innovation! │ | |
| └─────────────────────────────────────────────┘ | |
| ``` | |
| ### RAG Architecture (Simplified for Demo) | |
| ``` | |
| Input Text | |
| │ | |
| ▼ | |
| ┌─────────────────────────────────────────────┐ | |
| │ knowledge_base.append({"fact": text, ...}) │ | |
| │ Simple list storage - no transformation │ | |
| └─────────────────────────────────────────────┘ | |
| Query | |
| │ | |
| ▼ | |
| ┌─────────────────────────────────────────────┐ | |
| │ Keyword Overlap Scoring │ | |
| │ ┌─────────────────────────────────────────┐ │ | |
| │ │ question_words = set(query.split()) │ │ | |
| │ │ for fact in knowledge_base: │ │ | |
| │ │ fact_words = set(fact.split()) │ │ | |
| │ │ score = len(question_words ∩ fact_ │ │ | |
| │ │ words) │ │ | |
| │ └─────────────────────────────────────────┘ │ | |
| └─────────────────────────────────────────────┘ | |
| │ | |
| ▼ | |
| ┌─────────────────────────────────────────────┐ | |
| │ Top-K Selection (K=2 in our demo) │ | |
| │ Return facts with highest overlap scores │ | |
| └─────────────────────────────────────────────┘ | |
| ``` | |
| --- | |
| *These diagrams represent the actual implementation in this demo. The code is open source.* | |
| """ | |
| ABOUT_MD = """ | |
| ## About This Project | |
| ### What Makes This Special | |
| This is **NOT a simulation**. The demo runs real PyTorch code: | |
| 1. **Real Neural Network**: 2-layer MLP with ~250K parameters | |
| 2. **Real Gradient Descent**: `torch.autograd.grad()` computes gradients | |
| 3. **Real Weight Updates**: Parameters change during inference | |
| 4. **Real Surprise Metric**: MSE loss measures prediction error | |
| ### The Titans Architecture | |
| Based on Google's December 2024 paper: [arxiv.org/abs/2501.00663](https://arxiv.org/abs/2501.00663) | |
| **Key Innovation**: The memory IS a neural network. Instead of storing vectors, | |
| it learns patterns by updating weights during inference. | |
| ### Docker Integration | |
| - **MCP Server**: Model Context Protocol for Claude Desktop | |
| - **Checkpoints**: Save/restore learned state via Docker volumes | |
| - **Container-Native**: Designed for orchestrated deployment | |
| --- | |
| ## Limitations | |
| This is a **demonstration project**, not a production-ready system: | |
| | Component | Current State | Production Would Need | | |
| |-----------|---------------|----------------------| | |
| | **RAG Implementation** | Simplified keyword matching | Vector embeddings + semantic search (FAISS, Pinecone) | | |
| | **Neural Memory** | Basic 2-layer MLP | Deeper architecture, attention mechanisms | | |
| | **Scalability** | Single-user demo | Distributed inference, GPU optimization | | |
| | **Evaluation** | Qualitative comparison | Benchmarks, ablation studies, metrics | | |
| | **Memory Capacity** | ~250K parameters | Larger models, hierarchical memory | | |
| The RAG comparison uses simple word overlap scoring to demonstrate *why* keyword-based retrieval fails for pattern inference. A production RAG system would use proper embeddings and vector similarity search. | |
| --- | |
| ## Acknowledgments | |
| This project builds on the work of brilliant researchers: | |
| **Core Research:** | |
| - **Titans: Learning to Memorize at Test Time** (Google, Dec 2024) — [arXiv:2501.00663](https://arxiv.org/abs/2501.00663) | |
| - Ali Behrouz, Peilin Zhong, Vahab Mirrokni | |
| - **Learning to (Learn at Test Time): RNNs with Expressive Hidden States** (Stanford/Meta, Jul 2024) — [arXiv:2407.04620](https://arxiv.org/abs/2407.04620) | |
| - Yu Sun, Xinhao Li, Karan Dalal, et al. | |
| **Frameworks & Tools:** | |
| - [PyTorch](https://pytorch.org/) — The foundation for neural memory implementation | |
| - [Gradio](https://gradio.app/) — Interactive demo interface | |
| - [HuggingFace](https://huggingface.co/) — Model hosting and inference API | |
| - [Model Context Protocol](https://modelcontextprotocol.io/) — Claude Desktop integration | |
| **Inspiration:** | |
| - The broader ML community exploring alternatives to attention-based memory | |
| - Open-source contributors who make research accessible | |
| --- | |
| ## Next Steps | |
| Potential improvements for future iterations: | |
| 1. **Real RAG Baseline**: Integrate sentence-transformers + FAISS for proper semantic retrieval comparison | |
| 2. **Attention-Based Memory**: Implement the full Titans architecture with neural long-term memory gates | |
| 3. **Benchmarking**: Add quantitative evaluation on standard memory tasks (bAbI, etc.) | |
| 4. **Multi-Modal Support**: Extend to image/audio observations | |
| 5. **Distributed Memory**: Explore memory sharing across multiple agents | |
| 6. **Fine-Grained Forgetting**: Implement selective memory consolidation/pruning | |
| --- | |
| ## Built By | |
| **Carlos Crespo Macaya** | |
| AI Engineer — GenAI Systems & Applied MLOps | |
| This project demonstrates the ability to: | |
| 1. Read cutting-edge research (Titans paper) | |
| 2. Implement it correctly (PyTorch TTT) | |
| 3. Productionize it (Docker, MCP, CI/CD) | |
| 4. Communicate it effectively (this demo) | |
| """ | |
| # ============================================================================= | |
| # GRADIO INTERFACE | |
| # ============================================================================= | |
| with gr.Blocks(title="Docker Neural Memory", theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo: | |
| # Branded header | |
| gr.HTML(HEADER_HTML) | |
| with gr.Tabs(): | |
| # TAB 1: Comparison Demo (NEW - Main Feature) | |
| with gr.TabItem("LLM Comparison"): | |
| gr.Markdown(""" | |
| ### Neural Memory vs RAG (Retrieval-Augmented Generation) | |
| **Step 1:** Teach the system facts about preferences/patterns | |
| **Step 2:** Ask questions that require **inference**, not just retrieval | |
| **RAG** retrieves similar documents but can't learn patterns. | |
| **Neural Memory** learns from ALL observations and can infer from trends. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("#### Step 1: Teach Facts") | |
| fact_input = gr.Textbox( | |
| label="Add a Fact", | |
| placeholder="e.g., 'Carlos prefers VSCode over Vim'", | |
| lines=2, | |
| ) | |
| add_fact_btn = gr.Button("Add to Knowledge Base", variant="secondary") | |
| fact_output = gr.Markdown() | |
| gr.Markdown("#### Example Facts to Try") | |
| gr.Markdown(""" | |
| **Scenario: Learning User Preferences (Pattern Recognition)** | |
| 1. "Carlos rejected the bright colorful design" | |
| 2. "Carlos rejected the flashy animated homepage" | |
| 3. "Carlos approved the minimalist dark layout" | |
| 4. "Carlos approved the clean monochrome interface" | |
| Then ask: **"We have a new UI mockup with neon colors - will Carlos like it?"** | |
| *Neural Memory learns the pattern (Carlos prefers dark/minimal). RAG just retrieves similar facts without inferring the preference pattern.* | |
| """) | |
| with gr.Column(scale=1): | |
| gr.Markdown("#### Knowledge Base (RAG Store)") | |
| kb_plot = gr.Plot(label="Facts Stored") | |
| # Visualizations row | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("#### Neural Memory State") | |
| neural_state_plot = gr.Plot(label="Neural Network Weights & Stats") | |
| with gr.Column(scale=1): | |
| gr.Markdown("#### Embedding Space") | |
| tsne_plot = gr.Plot(label="t-SNE/PCA Visualization") | |
| add_fact_btn.click( | |
| add_to_knowledge_base, | |
| inputs=[fact_input], | |
| outputs=[fact_output, tsne_plot, neural_state_plot, kb_plot] | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### Step 2: Ask Questions & Compare Responses") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| question_input = gr.Textbox( | |
| label="Ask a Question", | |
| placeholder="e.g., 'We have a new UI mockup with neon colors - will Carlos like it?'", | |
| lines=2, | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown(""" | |
| **Best Questions for Neural Memory:** | |
| - Questions requiring **pattern inference** | |
| - Questions about **preferences/trends** | |
| - Questions needing **generalization** | |
| """) | |
| with gr.Row(): | |
| compare_btn = gr.Button("Compare Responses", variant="primary", size="lg") | |
| reset_compare_btn = gr.Button("Reset Comparison", variant="secondary") | |
| # Response display - side by side with clear headers | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("##### Neural Memory Response") | |
| nm_response = gr.Markdown() | |
| with gr.Column(): | |
| gr.Markdown("##### RAG Response") | |
| vanilla_response = gr.Markdown() | |
| comparison_summary = gr.Markdown(label="Comparison Metrics") | |
| compare_btn.click( | |
| compare_responses, | |
| inputs=[question_input], | |
| outputs=[nm_response, vanilla_response, comparison_summary, neural_state_plot, kb_plot], | |
| ) | |
| reset_compare_btn.click( | |
| reset_comparison, | |
| outputs=[comparison_summary, tsne_plot, neural_state_plot, kb_plot] | |
| ) | |
| # TAB 2: How It Works (Architecture Diagrams) | |
| with gr.TabItem("How It Works"): | |
| gr.Markdown(ARCHITECTURE_INTRO_MD) | |
| # Neural Memory Diagram | |
| gr.HTML(NEURAL_MEMORY_DIAGRAM_HTML) | |
| # RAG Diagram | |
| gr.HTML(RAG_DIAGRAM_HTML) | |
| # LLM Integration Explanation | |
| gr.Markdown(LLM_INTEGRATION_MD) | |
| # Side-by-side comparison table | |
| gr.HTML(COMPARISON_TABLE_HTML) | |
| # Architecture Summary | |
| gr.Markdown(ARCHITECTURE_SUMMARY_MD) | |
| # TAB 3: Key Concepts | |
| with gr.TabItem("Key Concepts"): | |
| gr.HTML(KEY_CONCEPTS_HTML) | |
| # TAB 4: Integration & Docker | |
| with gr.TabItem("Integration & Docker"): | |
| gr.Markdown("## How Memory Modules Integrate with LLMs") | |
| gr.Markdown("Follow this incremental explanation to understand how both RAG and Neural Memory attach to a vanilla LLM.") | |
| # Step 1: Vanilla LLM | |
| gr.HTML(VANILLA_LLM_DIAGRAM_HTML) | |
| # Step 2a: RAG Integration | |
| gr.HTML(RAG_INTEGRATION_DIAGRAM_HTML) | |
| # Step 2b: Neural Memory Integration | |
| gr.HTML(NEURAL_MEMORY_INTEGRATION_DIAGRAM_HTML) | |
| # Step 3: Docker Deployment | |
| gr.HTML(DOCKER_DEPLOYMENT_DIAGRAM_HTML) | |
| # Docker details | |
| gr.Markdown(DOCKER_INTEGRATION_MD) | |
| # TAB 5: About | |
| with gr.TabItem("About"): | |
| gr.Markdown(ABOUT_MD) | |
| # Polished footer with profile links | |
| gr.HTML(FOOTER_HTML) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |