Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

suraj-self commited on Mar 15

Commit

c430d50

1 Parent(s): c424ad1

update app.py

Browse files

Files changed (1) hide show

app.py +83 -94

app.py CHANGED Viewed

@@ -1,104 +1,93 @@
-import os
 import torch
 import gradio as gr
 from nanochat.gpt import GPT, GPTConfig
-from nanochat.tokenizer import RustBPETokenizer
-# Configuration
-MODEL_PATH = "model_000971.pt"
-CACHE_DIR = os.path.expanduser("~/.cache/nanochat/tokenizer/")
-TOKENIZER_DIR = CACHE_DIR if os.path.exists(CACHE_DIR) else "."
-print(f"--- Waking up the Toddler ---")
-print(f"Loading tokenizer from: {TOKENIZER_DIR}")
-# 1. Load Tokenizer & Map Special Tokens
-tokenizer = RustBPETokenizer.from_directory(TOKENIZER_DIR)
-tokenizer.bos_token_id = tokenizer.enc.encode_single_token("<|bos|>")
-tokenizer.user_start_id = tokenizer.enc.encode_single_token("<|user_start|>")
-tokenizer.user_end_id = tokenizer.enc.encode_single_token("<|user_end|>")
-tokenizer.assistant_start_id = tokenizer.enc.encode_single_token("<|assistant_start|>")
-tokenizer.assistant_end_id = tokenizer.enc.encode_single_token("<|assistant_end|>")
-# 2. Build Model Architecture
-config = GPTConfig(
-    vocab_size=32768,
-    n_layer=12,
-    n_head=6,
-    n_kv_head=6,
-    n_embd=768,
-    sequence_len=2048,
-)
 model = GPT(config)
-# 3. Load Weights
 print("Loading weights...")
-state_dict = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
 state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
 model.load_state_dict(state_dict, strict=False)
-model.to("cpu")
 model.eval()
-print("Toddler is awake and ready!")
-def chat_fn(message, history):
-    try:
-        # 1. Build Token List
-        tokens = [tokenizer.bos_token_id]
-        for user_msg, assistant_msg in history:
-            if user_msg:
-                tokens.extend([tokenizer.user_start_id] + tokenizer.encode(user_msg) + [tokenizer.user_end_id])
-            if assistant_msg:
-                tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(assistant_msg) + [tokenizer.assistant_end_id])
-        tokens.extend([tokenizer.user_start_id] + tokenizer.encode(message) + [tokenizer.user_end_id])
-        tokens.append(tokenizer.assistant_start_id)
-        input_ids = torch.tensor([tokens], dtype=torch.long)
-        # 2. Generate (Non-streaming for stability)
-        with torch.no_grad():
-            # In nanochat, generate usually returns the full sequence tensor
-            output_ids = model.generate(
-                input_ids,
-                max_tokens=512,
-                temperature=0.8,
-                top_k=40
-            )
-        # 3. Process Output
-        if isinstance(output_ids, torch.Tensor):
-            # Slicing to get only new tokens
-            new_tokens = output_ids[0][input_ids.shape[1]:]
-            response = tokenizer.decode(new_tokens.tolist())
-        else:
-            # If it's a generator, collect it all into one string
-            response = "".join([tokenizer.decode([t]) for t in output_ids])
-        # 4. Clean up tags
-        for tag in ["<|assistant_end|>", "<|end|>", "<|user_start|>", "<|bos|>"]:
-            response = response.split(tag)[0]
-        final_text = response.strip()
-        return final_text if final_text else "..."
-    except Exception as e:
-        print(f"CRITICAL ERROR: {e}")
-        return f"Toddler tantrum: {str(e)}"
-# 5. Launch UI (Cleaned for Gradio 6.0 compatibility)
-with gr.Blocks() as demo:
-    gr.Markdown("# 🧸 NanoChat-ClimbMix-D12")
-    gr.ChatInterface(
-        fn=chat_fn,
-        examples=["Hi Toddler!", "Explain UPI.", "Tell me a joke."]
-    )
-if __name__ == "__main__":
-    # Theme moved here to resolve UserWarning
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        theme=gr.themes.Soft(primary_hue="orange")
-    )

+import json
+import pickle
 import torch
 import gradio as gr
 from nanochat.gpt import GPT, GPTConfig
+print("🚀 Loading NanoChat...")
+# -----------------------
+# Load tokenizer
+# -----------------------
+with open("tokenizer.pkl", "rb") as f:
+    tokenizer = pickle.load(f)
+print("Tokenizer loaded")
+# -----------------------
+# Load model config
+# -----------------------
+with open("meta_000971.json") as f:
+    meta = json.load(f)
+config = GPTConfig(**meta)
+# -----------------------
+# Build model
+# -----------------------
 model = GPT(config)
 print("Loading weights...")
+state_dict = torch.load("model_000971.pt", map_location="cpu")
 state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
 model.load_state_dict(state_dict, strict=False)
 model.eval()
+print("✅ NanoChat ready")
+# -----------------------
+# Chat function
+# -----------------------
+def generate_reply(message, history):
+    tokens = [tokenizer.bos_token_id]
+    for user, assistant in history:
+        tokens += [tokenizer.user_start_id] + tokenizer.encode(user) + [tokenizer.user_end_id]
+        tokens += [tokenizer.assistant_start_id] + tokenizer.encode(assistant) + [tokenizer.assistant_end_id]
+    tokens += [tokenizer.user_start_id] + tokenizer.encode(message) + [tokenizer.user_end_id]
+    tokens.append(tokenizer.assistant_start_id)
+    input_ids = torch.tensor([tokens])
+    with torch.no_grad():
+        output = model.generate(
+            input_ids,
+            max_tokens=256,
+            temperature=0.8,
+            top_k=40
+        )
+    new_tokens = output[0][input_ids.shape[1]:]
+    text = tokenizer.decode(new_tokens.tolist())
+    for tag in ["<|assistant_end|>", "<|end|>"]:
+        text = text.split(tag)[0]
+    return text.strip()
+# -----------------------
+# UI
+# -----------------------
+demo = gr.ChatInterface(
+    fn=generate_reply,
+    title="🧸 NanoChat ClimbMix D12",
+    description="Small locally-trained NanoChat model running on HuggingFace Spaces",
+    examples=[
+        "Hi!",
+        "Explain UPI",
+        "Tell me a joke"
+    ],
+)
+demo.launch(server_name="0.0.0.0", server_port=7860)