Spaces:

lap096
/

Agent_1

Runtime error

App Files Files Community

lap096 commited on 13 days ago

Commit

06df239

verified ·

1 Parent(s): 6e46416

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -15

app.py CHANGED Viewed

@@ -2,9 +2,10 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-MODEL_NAME = "distilgpt2"  # tiny model, works on free CPU Spaces
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
 SYSTEM_PROMPT = """
 You are agent_1, a single AI system.
@@ -20,36 +21,42 @@ Agent_1:
 ###
 """
-def respond(user_input):
-    # Keep prompt short to avoid memory issues
     prompt = SYSTEM_PROMPT + f"User: {user_input}\nAgent_1:\n"
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=200)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=50,  # short output to prevent crashes
             do_sample=True,
             temperature=0.7,
             pad_token_id=tokenizer.eos_token_id
         )
-    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Parse thoughts and response
-    if "[Response]" in text:
-        parts = text.split("[Response]")
         thoughts = parts[0].strip()
-        response = parts[1].strip()
     else:
-        thoughts = "[Thoughts] Thinking..."
-        response = text.strip()
     return f"{thoughts}\n[Response] {response}"
-gr.ChatInterface(
     fn=respond,
     title="agent_1 Hugging Face Space",
     description="Tiny AI with simulated internal thoughts. Fully local, runs on free CPU."
-).launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Load model and tokenizer
+MODEL_NAME = "distilgpt2"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, low_cpu_mem_usage=True)
 SYSTEM_PROMPT = """
 You are agent_1, a single AI system.
 ###
 """
+def respond(user_input, history):
+    # Construct the specific prompt for this turn
     prompt = SYSTEM_PROMPT + f"User: {user_input}\nAgent_1:\n"
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=60,
             do_sample=True,
             temperature=0.7,
             pad_token_id=tokenizer.eos_token_id
         )
+    # Decode only the newly generated tokens
+    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    new_text = full_text[len(tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)):].strip()
+    # Parse logic for Thoughts and Response
+    if "[Response]" in new_text:
+        parts = new_text.split("[Response]")
         thoughts = parts[0].strip()
+        response = parts[1].split("User:")[0].strip() # Clean up potential hallucinations
     else:
+        thoughts = "[Thoughts] Processing..."
+        response = new_text.split("User:")[0].strip()
     return f"{thoughts}\n[Response] {response}"
+# Launch the interface
+demo = gr.ChatInterface(
     fn=respond,
     title="agent_1 Hugging Face Space",
     description="Tiny AI with simulated internal thoughts. Fully local, runs on free CPU."
+)
+if __name__ == "__main__":
+    demo.launch()