Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

Really-Amazing commited on Mar 15

Commit

6fd1e97

verified ·

1 Parent(s): e116dd8

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -35

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import torch
 import gradio as gr
 from nanochat.engine import Engine
 from nanochat.tokenizer import get_tokenizer
 from nanochat.gpt import GPT, GPTConfig
@@ -7,22 +8,22 @@ from nanochat.gpt import GPT, GPTConfig
 MODEL_PATH = "model_000971.pt"
 print("Waking up the toddler (NanoChat-ClimbMix-D12)...")
 tokenizer = get_tokenizer()
-# EXACT values from your local scripts.chat_web output
-config = GPTConfig(
-    vocab_size=32768,
-    n_layer=12,
-    n_head=6,
-    n_kv_head=6,
-    n_embd=768,
-    sequence_len=2048,
-)
 model = GPT(config)
 print("Loading weights...")
 state_dict = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
 unwanted_prefix = '_orig_mod.'
 for k in list(state_dict.keys()):
     if k.startswith(unwanted_prefix):
@@ -35,14 +36,7 @@ print("Model ready!")
 engine = Engine(model=model, tokenizer=tokenizer)
-def safe_encode(text):
-    """Helper to ensure we only get the list of token IDs."""
-    encoded = tokenizer.encode(text)
-    # If it's a tuple (ids, mask), just take the ids
-    if isinstance(encoded, tuple):
-        return list(encoded[0])
-    return list(encoded)
 def chat_fn(message, history):
     try:
         prompt_tokens = []
@@ -53,30 +47,20 @@ def chat_fn(message, history):
         prompt_tokens.extend(list(tokenizer.encode(f"<|user|>{message}<|end|><|assistant|>")))
-        # Generate and handle possible tuple return
-        gen_output = engine.generate(
             prompt_tokens,
             max_tokens=512,
             temperature=0.8,
             top_k=50,
         )
-        # Unwrap if it's a tuple (common in batched/speculative forks)
-        if isinstance(gen_output, tuple):
-            new_tokens = gen_output[0]  # usually first is tokens
-            print("Unwrapped tuple from generate:", type(new_tokens))
-        else:
-            new_tokens = gen_output
-        # Convert to list if tensor
         if hasattr(new_tokens, 'tolist'):
             new_tokens = new_tokens.tolist()
-        elif not isinstance(new_tokens, list):
-            new_tokens = list(new_tokens)
         response = tokenizer.decode(new_tokens).strip()
-        # Clean end tag
         for end_tag in ["<|end|>", "<|assistant_end|>", "<|EOS|>"]:
             if end_tag in response:
                 response = response.split(end_tag)[0].strip()
@@ -85,16 +69,15 @@ def chat_fn(message, history):
         return response or "Toddler says: ... 😅"
     except Exception as e:
-        import traceback
-        print(traceback.format_exc())  # log full stack for debug
         return f"Toddler tantrum: {str(e)}"
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
-    gr.Markdown("# 🧸 NanoChat-ClimbMix-D12")
-    gr.Markdown("Architecture verified. Tuple/Generator issues handled.")
     gr.ChatInterface(
         fn=chat_fn,
-        examples=["Tell me a joke", "What is UPI?"],
         title="Chat with the Toddler"
     )

 import torch
 import gradio as gr
+import json  # ← ONLY NEW IMPORT
 from nanochat.engine import Engine
 from nanochat.tokenizer import get_tokenizer
 from nanochat.gpt import GPT, GPTConfig
 MODEL_PATH = "model_000971.pt"
 print("Waking up the toddler (NanoChat-ClimbMix-D12)...")
 tokenizer = get_tokenizer()
+print("Creating GPT model skeleton from meta_000971.json...")
+# === ONLY CHANGE: Load exact config from meta file (same as working space) ===
+with open("meta_000971.json", "r", encoding="utf-8") as f:
+    meta_data = json.load(f)
+config = GPTConfig(**meta_data["model_config"])
 model = GPT(config)
+# =====================================================================
 print("Loading weights...")
 state_dict = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
 unwanted_prefix = '_orig_mod.'
 for k in list(state_dict.keys()):
     if k.startswith(unwanted_prefix):
 engine = Engine(model=model, tokenizer=tokenizer)
+# Your existing chat_fn (kept 100% unchanged)
 def chat_fn(message, history):
     try:
         prompt_tokens = []
         prompt_tokens.extend(list(tokenizer.encode(f"<|user|>{message}<|end|><|assistant|>")))
+        new_tokens = engine.generate(
             prompt_tokens,
             max_tokens=512,
             temperature=0.8,
             top_k=50,
         )
+        if isinstance(new_tokens, tuple):
+            new_tokens = new_tokens[0]
         if hasattr(new_tokens, 'tolist'):
             new_tokens = new_tokens.tolist()
         response = tokenizer.decode(new_tokens).strip()
         for end_tag in ["<|end|>", "<|assistant_end|>", "<|EOS|>"]:
             if end_tag in response:
                 response = response.split(end_tag)[0].strip()
         return response or "Toddler says: ... 😅"
     except Exception as e:
         return f"Toddler tantrum: {str(e)}"
+# Rest of your UI (unchanged)
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+    gr.Markdown("# 🧸 NanoChat-ClimbMix-D12 – Confident Toddler")
+    gr.Markdown("Using exact config from meta_000971.json (same as working space)")
     gr.ChatInterface(
         fn=chat_fn,
+        examples=["Tell me a joke", "What is UPI?", "Write hello world Python"],
         title="Chat with the Toddler"
     )