Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

suraj-self commited on Mar 15

Commit

1efbc24

1 Parent(s): a5f28af

updated

Browse files

Files changed (1) hide show

app.py +14 -29

app.py CHANGED Viewed

@@ -35,39 +35,24 @@ model.eval()
 def predict(message, history):
     try:
-        # 1. Prepare token list
         tokens = [tokenizer.bos_token_id]
-        # FIX: Explicitly extract 'content' string from Gradio objects
-        for entry in history:
-            if isinstance(entry, dict):
-                # Gradio 5/6 format: {"role": "user", "content": "..."}
-                role = entry.get("role")
-                content = entry.get("content", "")
-                if role == "user":
-                    tokens.extend([tokenizer.user_start_id] + tokenizer.encode(str(content)) + [tokenizer.user_end_id])
-                elif role == "assistant":
-                    tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(str(content)) + [tokenizer.assistant_end_id])
-            elif isinstance(entry, (list, tuple)):
-                # Legacy format: [user_msg, assistant_msg]
-                user_content, assistant_content = entry[0], entry[1]
-                if user_content:
-                    tokens.extend([tokenizer.user_start_id] + tokenizer.encode(str(user_content)) + [tokenizer.user_end_id])
-                if assistant_content:
-                    tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(str(assistant_content)) + [tokenizer.assistant_end_id])
-        # 2. Add current user prompt
-        tokens.extend([tokenizer.user_start_id] + tokenizer.encode(str(message)) + [tokenizer.user_end_id])
         tokens.append(tokenizer.assistant_start_id)
-        # 3. Streaming Generation
         with torch.no_grad():
-            # Pass as list to satisfy the nanochat assertion check
             output = model.generate(
                 tokens,
                 max_tokens=512,
-                temperature=0.8,
                 top_k=40
             )
@@ -76,17 +61,17 @@ def predict(message, history):
                 token_id = token if isinstance(token, int) else token.item()
                 char = tokenizer.decode([token_id])
-                # Stop tags to prevent the model from talking to itself
                 if any(tag in char for tag in ["<|assistant_end|>", "<|end|>", "<|user_start|>"]):
                     break
                 generated_text += char
                 yield generated_text.strip()
     except Exception as e:
-        # Log the exact error to the console for QA debugging
-        print(f"Error details: {str(e)}")
-        yield f"Toddler tantrum: {str(e)}"
 # Launching with Gradio 6.0 compatibility
 demo = gr.ChatInterface(

 def predict(message, history):
     try:
+        # 1. Stateless Prompt Construction
+        # We completely ignore 'history' to prevent the model from repeating old answers.
         tokens = [tokenizer.bos_token_id]
+        # We only encode the CURRENT message
+        user_content = str(message).strip()
+        tokens.extend([tokenizer.user_start_id] + tokenizer.encode(user_content) + [tokenizer.user_end_id])
+        # Add the signal for the assistant to start talking
         tokens.append(tokenizer.assistant_start_id)
+        # 2. Streaming Generation
         with torch.no_grad():
+            # Pass as a Python list to satisfy the nanochat engine assertion
             output = model.generate(
                 tokens,
                 max_tokens=512,
+                temperature=0.8, # You can try 0.7 for more factual answers
                 top_k=40
             )
                 token_id = token if isinstance(token, int) else token.item()
                 char = tokenizer.decode([token_id])
+                # Check for stop tags in the character stream
                 if any(tag in char for tag in ["<|assistant_end|>", "<|end|>", "<|user_start|>"]):
                     break
                 generated_text += char
+                # Yielding the text as it generates for that "real-time" feel
                 yield generated_text.strip()
     except Exception as e:
+        print(f"Stateless Predict Error: {str(e)}")
+        yield f"Toddler tantrum (Stateless): {str(e)}"
 # Launching with Gradio 6.0 compatibility
 demo = gr.ChatInterface(