Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

Really-Amazing commited on Mar 15

Commit

5d24f6a

verified ·

1 Parent(s): 6fd1e97

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -52

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import torch
 import gradio as gr
-import json  # ← ONLY NEW IMPORT
 from nanochat.engine import Engine
 from nanochat.tokenizer import get_tokenizer
 from nanochat.gpt import GPT, GPTConfig
@@ -8,78 +7,98 @@ from nanochat.gpt import GPT, GPTConfig
 MODEL_PATH = "model_000971.pt"
 print("Waking up the toddler (NanoChat-ClimbMix-D12)...")
 tokenizer = get_tokenizer()
-print("Creating GPT model skeleton from meta_000971.json...")
-# === ONLY CHANGE: Load exact config from meta file (same as working space) ===
-with open("meta_000971.json", "r", encoding="utf-8") as f:
-    meta_data = json.load(f)
-config = GPTConfig(**meta_data["model_config"])
 model = GPT(config)
-# =====================================================================
 print("Loading weights...")
 state_dict = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
-unwanted_prefix = '_orig_mod.'
-for k in list(state_dict.keys()):
-    if k.startswith(unwanted_prefix):
-        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
 model.load_state_dict(state_dict, strict=False)
 model.to("cpu")
 model.eval()
 print("Model ready!")
-engine = Engine(model=model, tokenizer=tokenizer)
-# Your existing chat_fn (kept 100% unchanged)
 def chat_fn(message, history):
     try:
-        prompt_tokens = []
         for user_msg, assistant_msg in history:
-            prompt_tokens.extend(list(tokenizer.encode(f"<|user|>{user_msg}<|end|>")))
             if assistant_msg:
-                prompt_tokens.extend(list(tokenizer.encode(f"<|assistant|>{assistant_msg}<|end|>")))
-        prompt_tokens.extend(list(tokenizer.encode(f"<|user|>{message}<|end|><|assistant|>")))
-        new_tokens = engine.generate(
-            prompt_tokens,
-            max_tokens=512,
-            temperature=0.8,
-            top_k=50,
-        )
-        if isinstance(new_tokens, tuple):
-            new_tokens = new_tokens[0]
-        if hasattr(new_tokens, 'tolist'):
-            new_tokens = new_tokens.tolist()
-        response = tokenizer.decode(new_tokens).strip()
-        for end_tag in ["<|end|>", "<|assistant_end|>", "<|EOS|>"]:
-            if end_tag in response:
-                response = response.split(end_tag)[0].strip()
-                break
-        return response or "Toddler says: ... 😅"
     except Exception as e:
         return f"Toddler tantrum: {str(e)}"
-# Rest of your UI (unchanged)
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
-    gr.Markdown("# 🧸 NanoChat-ClimbMix-D12 – Confident Toddler")
-    gr.Markdown("Using exact config from meta_000971.json (same as working space)")
-    gr.ChatInterface(
-        fn=chat_fn,
-        examples=["Tell me a joke", "What is UPI?", "Write hello world Python"],
-        title="Chat with the Toddler"
-    )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import torch
 import gradio as gr
 from nanochat.engine import Engine
 from nanochat.tokenizer import get_tokenizer
 from nanochat.gpt import GPT, GPTConfig
 MODEL_PATH = "model_000971.pt"
 print("Waking up the toddler (NanoChat-ClimbMix-D12)...")
 tokenizer = get_tokenizer()
+# SET SPECIAL TOKENS (Aligned with Saint Iberis working space)
+# We use .get() or try/except to handle different tokenizer versions
+try:
+    bos_id = tokenizer.encode("<|bos|>")[0]
+    user_start_id = tokenizer.encode("<|user_start|>")[0]
+    user_end_id = tokenizer.encode("<|user_end|>")[0]
+    assistant_start_id = tokenizer.encode("<|assistant_start|>")[0]
+    assistant_end_id = tokenizer.encode("<|assistant_end|>")[0]
+except:
+    # Fallback to standard tags if the specific Saint Iberis ones aren't in your vocab
+    bos_id = tokenizer.encode("<|endoftext|>")[0]
+    user_start_id = tokenizer.encode("<|user|>")[0]
+    user_end_id = tokenizer.encode("<|end|>")[0]
+    assistant_start_id = tokenizer.encode("<|assistant|>")[0]
+    assistant_end_id = tokenizer.encode("<|end|>")[0]
+print("Creating GPT model skeleton (6-head, 2048 seq)...")
+config = GPTConfig(
+    vocab_size=32768,
+    n_layer=12,
+    n_head=6,
+    n_kv_head=6,
+    n_embd=768,
+    sequence_len=2048,
+)
 model = GPT(config)
 print("Loading weights...")
 state_dict = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
+state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
 model.load_state_dict(state_dict, strict=False)
 model.to("cpu")
 model.eval()
 print("Model ready!")
+# We use the model directly to avoid 'Engine' type-hinting issues
 def chat_fn(message, history):
     try:
+        # 1. Build token list
+        tokens = [bos_id]
         for user_msg, assistant_msg in history:
+            tokens.extend([user_start_id] + list(tokenizer.encode(user_msg)) + [user_end_id])
             if assistant_msg:
+                tokens.extend([assistant_start_id] + list(tokenizer.encode(assistant_msg)) + [assistant_end_id])
+        # Current turn
+        tokens.extend([user_start_id] + list(tokenizer.encode(message)) + [user_end_id])
+        tokens.append(assistant_start_id)
+        # 2. THE FIX: Convert to Tensor before generating
+        input_ids = torch.tensor([tokens], dtype=torch.long).to("cpu")
+        # 3. Generate
+        # If your model.generate is a generator (streaming), we'll take the result
+        with torch.no_grad():
+            output_ids = model.generate(
+                input_ids,
+                max_new_tokens=512,
+                temperature=0.8,
+                top_k=50,
+            )
+        # 4. Decode (handling both streaming and blocking outputs)
+        # If output_ids is a generator, we collect it; if it's a tensor, we decode it.
+        if isinstance(output_ids, torch.Tensor):
+            # Take only the newly generated tokens
+            new_tokens = output_ids[0][input_ids.shape[1]:]
+            response = tokenizer.decode(new_tokens.tolist()).strip()
+        else:
+            # It's a generator (streaming)
+            full_response = ""
+            for token in output_ids:
+                full_response += tokenizer.decode([token])
+            response = full_response.strip()
+        # Clean up stop tags
+        for tag in ["<|assistant_end|>", "<|end|>", "<|user_start|>"]:
+            if tag in response:
+                response = response.split(tag)[0].strip()
+        return response or "Toddler is thinking... 😅"
     except Exception as e:
         return f"Toddler tantrum: {str(e)}"
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🧸 NanoChat-ClimbMix-D12")
+    gr.ChatInterface(fn=chat_fn, title="Toddler Chat")
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)