memory-augmented-generation

Sleeping

App Files Files Community

Pavantej commited on Dec 20, 2025

Commit

939db07

verified ·

1 Parent(s): 6fc70f4

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +68 -104

app.py CHANGED Viewed

@@ -1,67 +1,72 @@
 """
 Titans + MIRAS Demo: A Brain That Changes Itself While Thinking
-This implements a minimal version of Titans (test-time learning) and MIRAS
-(associative memory) using distilgpt2 running on Hugging Face.
-Key features:
-- Test-time learning: Memory updates while generating responses
-- Retention gate: Surprising events are more memorable
-- Persistent memory: Remembers across sessions
 """
-import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 from miras_memory import MIRASMemory
 from projections import KeyProjection, ValueProjection
 from memory_store import MemoryStore
 # ========== Configuration ==========
 MODEL_NAME = "distilgpt2"
 HIDDEN_DIM = 768  # distilgpt2 hidden dimension
-MEMORY_DIM = 256  # memory dimension
-LEARNING_RATE = 1e-3  # test-time learning rate
-MAX_NEW_TOKENS = 50  # max tokens to generate
 # ========== Initialize Components ==========
 print("🧠 Initializing Titans + MIRAS brain...")
-# Base language model
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 tokenizer.pad_token = tokenizer.eos_token
 model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
-model.eval()
-# Memory system
-memory = MIRASMemory(memory_dim=MEMORY_DIM, init_scale=0.01)
 key_proj = KeyProjection(HIDDEN_DIM, MEMORY_DIM)
 value_proj = ValueProjection(HIDDEN_DIM, MEMORY_DIM)
-# Memory persistence
 store = MemoryStore(save_dir="memory")
 store.load(memory)
 print("✅ Brain initialized!")
-# ========== Core Logic ==========
-def chat(user_input, history):
     """
-    Main chat function that:
-    1. Processes input through base LM
-    2. Updates memory via test-time learning
-    3. Generates response
-    4. Returns response + memory stats
     """
-    if not user_input.strip():
-        return history
     # === Step 1: Extract hidden states from input ===
-    inputs = tokenizer(user_input, return_tensors="pt", padding=True)
     with torch.no_grad():
         outputs = model(
@@ -109,8 +114,8 @@ def chat(user_input, history):
     response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
     # Remove the input prompt from response
-    if response.startswith(user_input):
-        response = response[len(user_input):].strip()
     if not response:
         response = "..."
@@ -118,89 +123,48 @@ def chat(user_input, history):
     # === Step 4: Save memory ===
     store.save(memory)
-    # === Step 5: Format output ===
     stats = memory.get_stats()
     memory_info = (
-        f"**Memory Update**: Loss={loss.item():.4f} | "
-        f"Retention={retention:.2f}x | "
-        f"Updates={stats['updates']} | "
-        f"Avg Loss={stats['avg_loss']:.4f}"
     )
-    # Build response with memory stats
-    bot_message = f"{response}\n\n---\n*{memory_info}*"
-    # Update history with simple tuple format (Gradio 4.x compatible)
-    history = history + [[user_input, bot_message]]
-    return history
-def clear_conversation():
-    """Clear the conversation but keep memory."""
-    return []
 # ========== Gradio Interface ==========
-with gr.Blocks(title="Titans + MIRAS: Self-Modifying Brain") as demo:
-    gr.Markdown("""
-    # 🧠 Titans + MIRAS: A Brain That Changes Itself While Thinking
-    This is a minimal implementation of **Titans** (test-time learning) and **MIRAS** (associative memory).
-    **What makes this special:**
-    - 🔄 **Test-time learning**: The memory updates with every interaction
-    - 🎯 **Retention gate**: Surprising inputs are more memorable
-    - 💾 **Persistent memory**: Remembers across sessions
     **How it works:**
-    1. Your input is processed through distilgpt2
-    2. Hidden states are projected to memory key/value space
-    3. Memory learns via gradient descent (learning rate adjusted by surprise)
-    4. Model generates a response
-    5. Memory is saved to disk
-    *Watch the memory loss decrease as it learns from your conversations!*
-    """)
-    chatbot = gr.Chatbot(
-        label="Conversation",
-        height=400
-    )
-    with gr.Row():
-        msg = gr.Textbox(
-            label="Your Message",
-            placeholder="Type your message here...",
-            scale=4,
-        )
-        submit = gr.Button("Send", scale=1, variant="primary")
-    with gr.Row():
-        clear = gr.Button("Clear Conversation (Keep Memory)")
-    gr.Markdown("""
-    ### 📊 Memory Stats
-    - **Loss**: How well memory predicts values (lower = better)
-    - **Retention**: Learning rate multiplier (higher for surprising inputs)
-    - **Updates**: Total number of memory updates
-    - **Avg Loss**: Average loss across all updates
-    ### 📚 References
-    - **Titans**: [arxiv.org/abs/2501.00663](https://arxiv.org/abs/2501.00663)
-    - **MIRAS**: [arxiv.org/abs/2504.13173](https://arxiv.org/abs/2504.13173)
-    """)
-    # Event handlers
-    msg.submit(chat, [msg, chatbot], [chatbot]).then(
-        lambda: "", None, msg
-    )
-    submit.click(chat, [msg, chatbot], [chatbot]).then(
-        lambda: "", None, msg
-    )
-    clear.click(clear_conversation, None, [chatbot])
-print("🚀 Launching Gradio interface...")
 demo.launch()

 """
 Titans + MIRAS Demo: A Brain That Changes Itself While Thinking
+This application demonstrates test-time learning using:
+- Titans: Test-time training framework
+- MIRAS: Associative memory with retention gate
 """
 import torch
+import torch.nn as nn
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import gradio as gr
 from miras_memory import MIRASMemory
 from projections import KeyProjection, ValueProjection
 from memory_store import MemoryStore
+print("=" * 50)
+print("===== Application Startup at", __import__('datetime').datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
+print("=" * 50)
+print()
 # ========== Configuration ==========
 MODEL_NAME = "distilgpt2"
 HIDDEN_DIM = 768  # distilgpt2 hidden dimension
+MEMORY_DIM = 256  # Memory space dimension
+LEARNING_RATE = 1e-3  # Base learning rate for test-time updates
+MAX_NEW_TOKENS = 50  # Max tokens to generate
 # ========== Initialize Components ==========
 print("🧠 Initializing Titans + MIRAS brain...")
+# Load base language model (frozen)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 tokenizer.pad_token = tokenizer.eos_token
 model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+model.eval()  # Frozen - no training
+# Create projection layers
 key_proj = KeyProjection(HIDDEN_DIM, MEMORY_DIM)
 value_proj = ValueProjection(HIDDEN_DIM, MEMORY_DIM)
+# Create memory module
+memory = MIRASMemory(memory_dim=MEMORY_DIM, init_scale=0.01)
+# Load persistent memory
 store = MemoryStore(save_dir="memory")
 store.load(memory)
 print("✅ Brain initialized!")
+# ========== Chat Function ==========
+def chat(message, history):
     """
+    Main chat function for gr.ChatInterface.
+    Args:
+        message: str - user's current message
+        history: list of dicts with 'role' and 'content' keys
+    Returns:
+        str - assistant's response with memory stats
     """
+    if not message.strip():
+        return "Please enter a message."
     # === Step 1: Extract hidden states from input ===
+    inputs = tokenizer(message, return_tensors="pt", padding=True)
     with torch.no_grad():
         outputs = model(
     response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
     # Remove the input prompt from response
+    if response.startswith(message):
+        response = response[len(message):].strip()
     if not response:
         response = "..."
     # === Step 4: Save memory ===
     store.save(memory)
+    # === Step 5: Format output with memory stats ===
     stats = memory.get_stats()
     memory_info = (
+        f"\n\n---\n"
+        f"**🧠 Memory Update**\n"
+        f"- Loss: {loss.item():.4f} (lower = better prediction)\n"
+        f"- Retention: {retention:.2f}x (surprise factor)\n"
+        f"- Total Updates: {stats['updates']}\n"
+        f"- Avg Loss: {stats['avg_loss']:.4f}"
     )
+    return response + memory_info
 # ========== Gradio Interface ==========
+print("🚀 Launching Gradio interface...")
+demo = gr.ChatInterface(
+    fn=chat,
+    title="🧠 Titans + MIRAS: A Brain That Changes Itself While Thinking",
+    description="""
+    This chatbot uses **test-time learning** - it updates its memory with every message!
     **How it works:**
+    1. Your message is processed through distilgpt2
+    2. Memory predicts what it should remember
+    3. Prediction error (loss) indicates surprise
+    4. Higher surprise → stronger memory formation
+    5. Memory weights update via gradient descent
+    6. Response generated and memory saved to disk
+    **Watch the stats below each response to see the brain learning!**
+    """,
+    examples=[
+        "Hello! What can you do?",
+        "Tell me about test-time learning",
+        "What is 2+2?",
+        "Repeat this exact phrase: The quick brown fox",
+    ],
+    cache_examples=False,
+    theme="soft",
+)
 demo.launch()