Spaces:

reactallegany
/

promptlab

Runtime error

App Files Files Community

bditto commited on Apr 16, 2025

Commit

b5786cf

verified ·

1 Parent(s): 2f24c08

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -27

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import random
 # Configuration 🛠️
-model_name = "microsoft/phi-3-mini-4k-instruct"  # Smaller model for memory constraints
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load model with optimizations
@@ -19,55 +19,47 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Safety tools 🛡️
 SAFE_RESPONSES = [
     "Let's focus on positive tech projects! 🌱",
-    "How about designing an eco-friendly robot? 🤖",
-    "Let's explore renewable energy solutions! ☀️"
 ]
 def generate_response(message, history):
-    # Simple safety check
     if any(word in message.lower() for word in ["violence", "hate", "gun"]):
         return random.choice(SAFE_RESPONSES)
-    # Format prompt
-    prompt = f"<|user|>\n{message}<|end|>\n<|assistant|>"
-    # Tokenize input
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     # Generate response
     outputs = model.generate(
-        inputs.input_ids,
         max_new_tokens=256,
         temperature=0.7,
-        do_sample=True,
-        pad_token_id=tokenizer.eos_token_id
     )
-    # Decode and return
-    return tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
-# Create Gradio interface
 demo = gr.ChatInterface(
-    fn=generate_response,
     examples=[
         "How to make a solar-powered robot?",
         "Python code for air quality sensor"
     ],
     title="🤖 REACT Ethical AI Lab",
-    description="Safe AI project assistant for students"
-)
-# Explicit API setup
-api = gr.mount_gradio_app(
-    app=demo.app,
-    blocks=demo,
-    path="/api"
 )
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
-        enable_queue=True,
-        share=False
     )

 import random
 # Configuration 🛠️
+model_name = "microsoft/phi-3-mini-4k-instruct"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load model with optimizations
 # Safety tools 🛡️
 SAFE_RESPONSES = [
     "Let's focus on positive tech projects! 🌱",
+    "How about designing an eco-friendly robot? 🤖"
 ]
 def generate_response(message, history):
+    # Convert history to new message format
+    messages = [{"role": "user", "content": msg} for msg, _ in history]
+    messages += [{"role": "assistant", "content": res} for _, res in history]
+    # Safety check
     if any(word in message.lower() for word in ["violence", "hate", "gun"]):
         return random.choice(SAFE_RESPONSES)
     # Generate response
+    inputs = tokenizer.apply_chat_template(
+        [{"role": "user", "content": message}],
+        return_tensors="pt"
+    ).to(model.device)
     outputs = model.generate(
+        inputs,
         max_new_tokens=256,
         temperature=0.7,
+        do_sample=True
     )
+    return tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True)
+# Create Gradio interface with updated message format
 demo = gr.ChatInterface(
+    generate_response,
     examples=[
         "How to make a solar-powered robot?",
         "Python code for air quality sensor"
     ],
     title="🤖 REACT Ethical AI Lab",
+    chatbot=gr.Chatbot(height=500, likeable=True)
 )
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
+        share=False  # Remove enable_queue parameter
     )