Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

suraj-self commited on Mar 15

Commit

e6eeb28

1 Parent(s): db8631a

updated

Browse files

Files changed (1) hide show

app.py +40 -54

app.py CHANGED Viewed

@@ -4,44 +4,18 @@ import gradio as gr
 from nanochat.gpt import GPT, GPTConfig
 from nanochat.tokenizer import RustBPETokenizer
-# Aggressive Path Finding
-# Since you have files in the root, we check '.' first
-possible_paths = [
-    ".",
-    "/app",
-    os.path.expanduser("~/.cache/nanochat/tokenizer/")
-]
-TOKENIZER_DIR = None
-for p in possible_paths:
-    if os.path.exists(os.path.join(p, "token_bytes.pt")):
-        TOKENIZER_DIR = p
-        break
-if not TOKENIZER_DIR:
-    # If still not found, we use root as a fallback
-    TOKENIZER_DIR = "."
 print(f"--- System Initialization ---")
-print(f"Loading tokenizer from: {os.path.abspath(TOKENIZER_DIR)}")
-# Load Tokenizer
 tokenizer = RustBPETokenizer.from_directory(TOKENIZER_DIR)
-# Map IDs (These MUST exist in your vocabulary)
-try:
-    tokenizer.bos_token_id = tokenizer.enc.encode_single_token("<|bos|>")
-    tokenizer.user_start_id = tokenizer.enc.encode_single_token("<|user_start|>")
-    tokenizer.user_end_id = tokenizer.enc.encode_single_token("<|user_end|>")
-    tokenizer.assistant_start_id = tokenizer.enc.encode_single_token("<|assistant_start|>")
-    tokenizer.assistant_end_id = tokenizer.enc.encode_single_token("<|assistant_end|>")
-except Exception as e:
-    print(f"Warning: Special tokens not found in vocab. Error: {e}")
-    # Fallback to standard GPT-2 tokens if yours are missing
-    tokenizer.bos_token_id = 50256
-    tokenizer.user_start_id = 50257
-    tokenizer.user_end_id = 50258
-    tokenizer.assistant_start_id = 50259
 # Model Setup
 config = GPTConfig(
@@ -53,7 +27,6 @@ config = GPTConfig(
 )
 model = GPT(config)
 print("Loading model weights...")
 state_dict = torch.load("model_000971.pt", map_location="cpu")
 state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
@@ -61,37 +34,50 @@ model.load_state_dict(state_dict, strict=False)
 model.eval()
 def predict(message, history):
     tokens = [tokenizer.bos_token_id]
     for human, assistant in history:
-        tokens.extend([tokenizer.user_start_id] + tokenizer.encode(human) + [tokenizer.user_end_id])
         if assistant:
             tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(assistant) + [tokenizer.assistant_end_id])
     tokens.extend([tokenizer.user_start_id] + tokenizer.encode(message) + [tokenizer.user_end_id])
     tokens.append(tokenizer.assistant_start_id)
-    input_ids = torch.tensor([tokens], dtype=torch.long)
     with torch.no_grad():
-        output = model.generate(input_ids, max_tokens=512, temperature=0.8)
-        # Generator vs Tensor handling
-        if isinstance(output, torch.Tensor):
-            new_tokens = output[0][input_ids.shape[1]:]
-            response = tokenizer.decode(new_tokens.tolist())
-            for tag in ["<|assistant_end|>", "<|end|>", "<|user_start|>"]:
-                response = response.split(tag)[0]
-            yield response.strip()
-        else:
-            generated_text = ""
-            for token in output:
-                token_id = token if isinstance(token, int) else token.item()
-                char = tokenizer.decode([token_id])
-                if "<|assistant_end|>" in char: break
-                generated_text += char
-                yield generated_text.strip()
-demo = gr.ChatInterface(fn=predict, title="🧸 NanoChat-D12")
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 from nanochat.gpt import GPT, GPTConfig
 from nanochat.tokenizer import RustBPETokenizer
+# Files are in the root of the space
+TOKENIZER_DIR = "."
 print(f"--- System Initialization ---")
 tokenizer = RustBPETokenizer.from_directory(TOKENIZER_DIR)
+# Map Special Tokens
+tokenizer.bos_token_id = tokenizer.enc.encode_single_token("<|bos|>")
+tokenizer.user_start_id = tokenizer.enc.encode_single_token("<|user_start|>")
+tokenizer.user_end_id = tokenizer.enc.encode_single_token("<|user_end|>")
+tokenizer.assistant_start_id = tokenizer.enc.encode_single_token("<|assistant_start|>")
+tokenizer.assistant_end_id = tokenizer.enc.encode_single_token("<|assistant_end|>")
 # Model Setup
 config = GPTConfig(
 )
 model = GPT(config)
 print("Loading model weights...")
 state_dict = torch.load("model_000971.pt", map_location="cpu")
 state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
 model.eval()
 def predict(message, history):
+    # 1. Prepare token list
     tokens = [tokenizer.bos_token_id]
     for human, assistant in history:
+        if human:
+            tokens.extend([tokenizer.user_start_id] + tokenizer.encode(human) + [tokenizer.user_end_id])
         if assistant:
             tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(assistant) + [tokenizer.assistant_end_id])
     tokens.extend([tokenizer.user_start_id] + tokenizer.encode(message) + [tokenizer.user_end_id])
     tokens.append(tokenizer.assistant_start_id)
+    # --- THE FIX FOR ASSERTION ERROR ---
+    # The error 'assert isinstance(tokens, list)' happens here.
+    # We pass the tokens as a LIST, not a Tensor, to satisfy nanochat's requirements.
+    # -----------------------------------
     with torch.no_grad():
+        # Call generate with the LIST 'tokens'
+        output = model.generate(
+            tokens,              # Passing as list [] instead of torch.tensor([[]])
+            max_tokens=512,
+            temperature=0.8,
+            top_k=40
+        )
+        generated_text = ""
+        # The Traceback shows model.generate is a generator (streaming)
+        for token in output:
+            # Handle if token is an int or a single-element tensor
+            token_id = token if isinstance(token, int) else token.item()
+            char = tokenizer.decode([token_id])
+            if "<|assistant_end|>" in char:
+                break
+            generated_text += char
+            yield generated_text.strip()
+# Launching with Gradio 6.0 compatibility
+demo = gr.ChatInterface(
+    fn=predict,
+    title="🧸 NanoChat-D12",
+    description="Running on CPU. Optimized for Saint Iberis weights."
+)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)