Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

Really-Amazing commited on Mar 15

Commit

72950d2

verified ·

1 Parent(s): 57899cc

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -21

app.py CHANGED Viewed

@@ -6,9 +6,7 @@ from nanochat.tokenizer import RustBPETokenizer
 # Configuration
 MODEL_PATH = "model_000971.pt"
-# The Dockerfile moves files to this specific cache location
 CACHE_DIR = os.path.expanduser("~/.cache/nanochat/tokenizer/")
-# Fallback to current directory if cache doesn't exist (local testing)
 TOKENIZER_DIR = CACHE_DIR if os.path.exists(CACHE_DIR) else "."
 print(f"--- Waking up the Toddler ---")
@@ -17,7 +15,6 @@ print(f"Loading tokenizer from: {TOKENIZER_DIR}")
 # 1. Load Tokenizer & Map Special Tokens
 tokenizer = RustBPETokenizer.from_directory(TOKENIZER_DIR)
-# These must match your training vocab
 tokenizer.bos_token_id = tokenizer.enc.encode_single_token("<|bos|>")
 tokenizer.user_start_id = tokenizer.enc.encode_single_token("<|user_start|>")
 tokenizer.user_end_id = tokenizer.enc.encode_single_token("<|user_end|>")
@@ -36,7 +33,7 @@ config = GPTConfig(
 model = GPT(config)
-# 3. Load Weights (with _orig_mod. cleaning)
 print("Loading weights...")
 state_dict = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
 state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
@@ -47,10 +44,11 @@ print("Toddler is awake and ready!")
 def chat_fn(message, history):
     try:
-        # Build Chat History
         tokens = [tokenizer.bos_token_id]
         for user_msg, assistant_msg in history:
-            tokens.extend([tokenizer.user_start_id] + tokenizer.encode(user_msg) + [tokenizer.user_end_id])
             if assistant_msg:
                 tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(assistant_msg) + [tokenizer.assistant_end_id])
@@ -60,9 +58,7 @@ def chat_fn(message, history):
         input_ids = torch.tensor([tokens], dtype=torch.long)
-        # 4. Generate with Streaming Logic
-        # Note: In nanochat.gpt, generate is typically an autoregressive loop.
-        # If your version returns a generator, we iterate. If a tensor, we slice.
         with torch.no_grad():
             output_ids = model.generate(
                 input_ids,
@@ -71,41 +67,42 @@ def chat_fn(message, history):
                 top_k=40
             )
-        # Handle Tensor vs Generator output
         if isinstance(output_ids, torch.Tensor):
-            # Just take the new parts
             new_tokens = output_ids[0][input_ids.shape[1]:]
             response = tokenizer.decode(new_tokens.tolist())
         else:
-            # It's a generator yielding token by token
             response = ""
             for token in output_ids:
                 decoded = tokenizer.decode([token])
                 if "<|assistant_end|>" in decoded:
                     break
                 response += decoded
-                yield response # Yield for streaming UI effect
-        # Final cleanup for non-streaming return
         for tag in ["<|assistant_end|>", "<|end|>", "<|user_start|>"]:
             response = response.split(tag)[0]
         return response.strip()
     except Exception as e:
-        # Crucial for QA: see the actual error in Space logs
         print(f"ERROR: {e}")
         return f"Toddler tantrum: {str(e)}"
-# 5. Launch UI
-with gr.Blocks(theme=gr.themes.Default(primary_hue="orange")) as demo:
     gr.Markdown("# 🧸 NanoChat-ClimbMix-D12")
-    gr.Markdown("A custom-trained small language model running on your CPU.")
     gr.ChatInterface(
         fn=chat_fn,
-        type="messages", # Updated for latest Gradio versions
-        examples=["Hi Toddler!", "How does UPI work?", "Tell me a story."]
     )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 # Configuration
 MODEL_PATH = "model_000971.pt"
 CACHE_DIR = os.path.expanduser("~/.cache/nanochat/tokenizer/")
 TOKENIZER_DIR = CACHE_DIR if os.path.exists(CACHE_DIR) else "."
 print(f"--- Waking up the Toddler ---")
 # 1. Load Tokenizer & Map Special Tokens
 tokenizer = RustBPETokenizer.from_directory(TOKENIZER_DIR)
 tokenizer.bos_token_id = tokenizer.enc.encode_single_token("<|bos|>")
 tokenizer.user_start_id = tokenizer.enc.encode_single_token("<|user_start|>")
 tokenizer.user_end_id = tokenizer.enc.encode_single_token("<|user_end|>")
 model = GPT(config)
+# 3. Load Weights
 print("Loading weights...")
 state_dict = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
 state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
 def chat_fn(message, history):
     try:
+        # Build Chat History (Handling standard Gradio list-of-lists format)
         tokens = [tokenizer.bos_token_id]
         for user_msg, assistant_msg in history:
+            if user_msg:
+                tokens.extend([tokenizer.user_start_id] + tokenizer.encode(user_msg) + [tokenizer.user_end_id])
             if assistant_msg:
                 tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(assistant_msg) + [tokenizer.assistant_end_id])
         input_ids = torch.tensor([tokens], dtype=torch.long)
+        # 4. Generate
         with torch.no_grad():
             output_ids = model.generate(
                 input_ids,
                 top_k=40
             )
+        # Handle output
         if isinstance(output_ids, torch.Tensor):
             new_tokens = output_ids[0][input_ids.shape[1]:]
             response = tokenizer.decode(new_tokens.tolist())
         else:
+            # Generator logic
             response = ""
             for token in output_ids:
                 decoded = tokenizer.decode([token])
                 if "<|assistant_end|>" in decoded:
                     break
                 response += decoded
+                yield response
+        # Final cleanup
         for tag in ["<|assistant_end|>", "<|end|>", "<|user_start|>"]:
             response = response.split(tag)[0]
         return response.strip()
     except Exception as e:
         print(f"ERROR: {e}")
         return f"Toddler tantrum: {str(e)}"
+# 5. Launch UI (Cleaned for Gradio 6.0 compatibility)
+with gr.Blocks() as demo:
     gr.Markdown("# 🧸 NanoChat-ClimbMix-D12")
     gr.ChatInterface(
         fn=chat_fn,
+        examples=["Hi Toddler!", "Explain UPI.", "Tell me a joke."]
     )
 if __name__ == "__main__":
+    # Theme moved here to resolve UserWarning
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        theme=gr.themes.Soft(primary_hue="orange")
+    )