Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

suraj-self commited on Mar 15

Commit

a5f28af

1 Parent(s): 6383c22

updated

Browse files

Files changed (1) hide show

app.py +18 -17

app.py CHANGED Viewed

@@ -38,32 +38,32 @@ def predict(message, history):
         # 1. Prepare token list
         tokens = [tokenizer.bos_token_id]
-        # FIX: Robust history handling for Gradio 5/6
         for entry in history:
-            # Handle list of dicts format: {"role": "user", "content": "..."}
             if isinstance(entry, dict):
                 role = entry.get("role")
                 content = entry.get("content", "")
                 if role == "user":
-                    tokens.extend([tokenizer.user_start_id] + tokenizer.encode(content) + [tokenizer.user_end_id])
                 elif role == "assistant":
-                    tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(content) + [tokenizer.assistant_end_id])
-            # Handle old list of lists format: [user_msg, assistant_msg]
             elif isinstance(entry, (list, tuple)):
-                human, assistant = entry[0], entry[1]
-                if human:
-                    tokens.extend([tokenizer.user_start_id] + tokenizer.encode(human) + [tokenizer.user_end_id])
-                if assistant:
-                    tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(assistant) + [tokenizer.assistant_end_id])
-        # Add current user prompt
-        tokens.extend([tokenizer.user_start_id] + tokenizer.encode(message) + [tokenizer.user_end_id])
         tokens.append(tokenizer.assistant_start_id)
-        # 2. Streaming Generation
         with torch.no_grad():
-            # Pass as list to satisfy the nanochat assertion
             output = model.generate(
                 tokens,
                 max_tokens=512,
@@ -76,15 +76,16 @@ def predict(message, history):
                 token_id = token if isinstance(token, int) else token.item()
                 char = tokenizer.decode([token_id])
-                # Stop if we hit the assistant end tag
-                if "<|assistant_end|>" in char or "<|end|>" in char:
                     break
                 generated_text += char
                 yield generated_text.strip()
     except Exception as e:
-        print(f"CRITICAL ERROR: {e}")
         yield f"Toddler tantrum: {str(e)}"
 # Launching with Gradio 6.0 compatibility

         # 1. Prepare token list
         tokens = [tokenizer.bos_token_id]
+        # FIX: Explicitly extract 'content' string from Gradio objects
         for entry in history:
             if isinstance(entry, dict):
+                # Gradio 5/6 format: {"role": "user", "content": "..."}
                 role = entry.get("role")
                 content = entry.get("content", "")
                 if role == "user":
+                    tokens.extend([tokenizer.user_start_id] + tokenizer.encode(str(content)) + [tokenizer.user_end_id])
                 elif role == "assistant":
+                    tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(str(content)) + [tokenizer.assistant_end_id])
             elif isinstance(entry, (list, tuple)):
+                # Legacy format: [user_msg, assistant_msg]
+                user_content, assistant_content = entry[0], entry[1]
+                if user_content:
+                    tokens.extend([tokenizer.user_start_id] + tokenizer.encode(str(user_content)) + [tokenizer.user_end_id])
+                if assistant_content:
+                    tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(str(assistant_content)) + [tokenizer.assistant_end_id])
+        # 2. Add current user prompt
+        tokens.extend([tokenizer.user_start_id] + tokenizer.encode(str(message)) + [tokenizer.user_end_id])
         tokens.append(tokenizer.assistant_start_id)
+        # 3. Streaming Generation
         with torch.no_grad():
+            # Pass as list to satisfy the nanochat assertion check
             output = model.generate(
                 tokens,
                 max_tokens=512,
                 token_id = token if isinstance(token, int) else token.item()
                 char = tokenizer.decode([token_id])
+                # Stop tags to prevent the model from talking to itself
+                if any(tag in char for tag in ["<|assistant_end|>", "<|end|>", "<|user_start|>"]):
                     break
                 generated_text += char
                 yield generated_text.strip()
     except Exception as e:
+        # Log the exact error to the console for QA debugging
+        print(f"Error details: {str(e)}")
         yield f"Toddler tantrum: {str(e)}"
 # Launching with Gradio 6.0 compatibility