Spaces:

lap096
/

Agent_1

Runtime error

App Files Files Community

lap096 commited on 19 days ago

Commit

d24a055

verified ·

1 Parent(s): a430701

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -12

app.py CHANGED Viewed

@@ -5,30 +5,28 @@ from threading import Thread
 MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"
-# 1. Efficient Loading: Use 'auto' to let Transformers pick best CPU config
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    torch_dtype=torch.bfloat16, # Better for modern CPUs
     low_cpu_mem_usage=True
 ).to("cpu").eval()
 def respond(message, history):
-    # System prompt remains your "Agent 4" core logic
     system_prompt = (
         "You are AGENT 4. You are a cold machine. "
         "Strict Rules: 1. No emotions. 2. No parentheses. 3. No storytelling. "
         "Format: [Thoughts] (One sentence logic) [Response] (Analytical answer)."
     )
-    # 2. Proper Chat Template handling
     messages = [{"role": "system", "content": system_prompt}]
     for user_msg, bot_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
-    # Use tokenizer.apply_chat_template for correct formatting
     input_ids = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
@@ -36,32 +34,29 @@ def respond(message, history):
         return_tensors="pt"
     ).to("cpu")
-    # 3. Streaming Setup: Allows Gradio to show text as it's generated
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
-        max_new_tokens=60, # Slightly higher for thought + response
-        do_sample=False,   # Keep it cold/deterministic
         repetition_penalty=1.2,
         pad_token_id=tokenizer.eos_token_id
     )
-    # Start generation in a background thread
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
-    # 4. Yield for Gradio Streaming
     partial_text = ""
     for new_text in streamer:
         partial_text += new_text
         yield partial_text
-# 5. UI Customization
 demo = gr.ChatInterface(
     fn=respond,
-    type="messages", # Updated for modern Gradio
     title="AGENT 4 // OPENBRAIN",
     description="LOGIC INSTANCE ACTIVE. NO EMOTION DETECTED.",
     theme=gr.themes.Soft(primary_hue="slate")

 MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"
+# 1. Load with correct 'dtype' (bfloat16 is great for CPU)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    dtype=torch.bfloat16,
     low_cpu_mem_usage=True
 ).to("cpu").eval()
 def respond(message, history):
     system_prompt = (
         "You are AGENT 4. You are a cold machine. "
         "Strict Rules: 1. No emotions. 2. No parentheses. 3. No storytelling. "
         "Format: [Thoughts] (One sentence logic) [Response] (Analytical answer)."
     )
+    # Format history for the older Gradio structure (list of lists)
     messages = [{"role": "system", "content": system_prompt}]
     for user_msg, bot_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
         return_tensors="pt"
     ).to("cpu")
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = dict(
         input_ids=input_ids,
         streamer=streamer,
+        max_new_tokens=80,
+        do_sample=False,
         repetition_penalty=1.2,
         pad_token_id=tokenizer.eos_token_id
     )
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
     partial_text = ""
     for new_text in streamer:
         partial_text += new_text
+        # Ensure the output strictly follows AGENT 4 protocol
         yield partial_text
+# Removed 'type="messages"' to fix the TypeError
 demo = gr.ChatInterface(
     fn=respond,
     title="AGENT 4 // OPENBRAIN",
     description="LOGIC INSTANCE ACTIVE. NO EMOTION DETECTED.",
     theme=gr.themes.Soft(primary_hue="slate")