AI_assaistant / app.py
BluSerK's picture
Update app.py
72e59db verified
import gradio as gr
import requests
import json
def chat_with_ai(message):
# ⚠️ PASTE YOUR NEW AWS PUBLIC IP HERE
url = "http://16.16.28.165/chat"
payload = {
"message": message + " Answer in 1-2 sentences.", # Forces a fast response
"max_tokens": 100
}
try:
response = requests.post(url, json=payload, timeout=60)
return response.json().get("answer", "Error: No answer in response.")
except requests.exceptions.Timeout:
return "The AWS server is taking too long (over 60s). Try a shorter question."
except Exception as e:
return f"Error connecting to AWS API: {str(e)}"
# Professional UI setup
theme = gr.themes.Soft(primary_hue="blue", secondary_hue="gray")
# Define the clickable examples
# Each list inside the main list represents the inputs for one example
sample_prompts = [
["Where is my order? Can you track package #89432?"],
["How do I return an item that arrived damaged?"],
["When will the wireless headphones be back in stock?"],
["I was overcharged for my last purchase. How do I get a refund?"],
["Can I change the shipping address for an order I just placed?"]
]
demo = gr.Interface(
fn=chat_with_ai,
inputs=gr.Textbox(placeholder="Ask a technical question...", label="User Message"),
outputs=gr.Textbox(label="AI Response"),
title="AI Engineering Project: Serverless LLM",
description="""
### Technical Overview:
* **Model:** Fine-tuned Qwen-2.5 (3B) via QLoRA.
* **Optimization:** GGUF 4-bit quantization.
* **Infrastructure:** Deployed on AWS ECS Fargate (4 vCPU) for cost-efficiency.
* **Backend:** FastAPI with strict JSON schema enforcement.
*Note: This model runs on serverless CPU infrastructure to optimize hosting costs, so inference may take 10-15 seconds.*
""",
theme=theme,
examples=sample_prompts # This automatically creates the clickable buttons
)
if __name__ == "__main__":
demo.launch()