Spaces:
Paused
Paused
| import gradio as gr | |
| import requests | |
| import json | |
| def chat_with_ai(message): | |
| # ⚠️ PASTE YOUR NEW AWS PUBLIC IP HERE | |
| url = "http://16.16.28.165/chat" | |
| payload = { | |
| "message": message + " Answer in 1-2 sentences.", # Forces a fast response | |
| "max_tokens": 100 | |
| } | |
| try: | |
| response = requests.post(url, json=payload, timeout=60) | |
| return response.json().get("answer", "Error: No answer in response.") | |
| except requests.exceptions.Timeout: | |
| return "The AWS server is taking too long (over 60s). Try a shorter question." | |
| except Exception as e: | |
| return f"Error connecting to AWS API: {str(e)}" | |
| # Professional UI setup | |
| theme = gr.themes.Soft(primary_hue="blue", secondary_hue="gray") | |
| # Define the clickable examples | |
| # Each list inside the main list represents the inputs for one example | |
| sample_prompts = [ | |
| ["Where is my order? Can you track package #89432?"], | |
| ["How do I return an item that arrived damaged?"], | |
| ["When will the wireless headphones be back in stock?"], | |
| ["I was overcharged for my last purchase. How do I get a refund?"], | |
| ["Can I change the shipping address for an order I just placed?"] | |
| ] | |
| demo = gr.Interface( | |
| fn=chat_with_ai, | |
| inputs=gr.Textbox(placeholder="Ask a technical question...", label="User Message"), | |
| outputs=gr.Textbox(label="AI Response"), | |
| title="AI Engineering Project: Serverless LLM", | |
| description=""" | |
| ### Technical Overview: | |
| * **Model:** Fine-tuned Qwen-2.5 (3B) via QLoRA. | |
| * **Optimization:** GGUF 4-bit quantization. | |
| * **Infrastructure:** Deployed on AWS ECS Fargate (4 vCPU) for cost-efficiency. | |
| * **Backend:** FastAPI with strict JSON schema enforcement. | |
| *Note: This model runs on serverless CPU infrastructure to optimize hosting costs, so inference may take 10-15 seconds.* | |
| """, | |
| theme=theme, | |
| examples=sample_prompts # This automatically creates the clickable buttons | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |