Spaces:

BluSerK
/

AI_assaistant

Paused

App Files Files Community

AI_assaistant / app.py

BluSerK

Update app.py

72e59db verified 28 days ago

raw

history blame contribute delete

1.97 kB

	import gradio as gr
	import requests
	import json

	def chat_with_ai(message):
	# ⚠️ PASTE YOUR NEW AWS PUBLIC IP HERE
	url = "http://16.16.28.165/chat"

	payload = {
	"message": message + " Answer in 1-2 sentences.", # Forces a fast response
	"max_tokens": 100
	}

	try:
	response = requests.post(url, json=payload, timeout=60)
	return response.json().get("answer", "Error: No answer in response.")
	except requests.exceptions.Timeout:
	return "The AWS server is taking too long (over 60s). Try a shorter question."
	except Exception as e:
	return f"Error connecting to AWS API: {str(e)}"

	# Professional UI setup
	theme = gr.themes.Soft(primary_hue="blue", secondary_hue="gray")

	# Define the clickable examples
	# Each list inside the main list represents the inputs for one example
	sample_prompts = [
	["Where is my order? Can you track package #89432?"],
	["How do I return an item that arrived damaged?"],
	["When will the wireless headphones be back in stock?"],
	["I was overcharged for my last purchase. How do I get a refund?"],
	["Can I change the shipping address for an order I just placed?"]
	]

	demo = gr.Interface(
	fn=chat_with_ai,
	inputs=gr.Textbox(placeholder="Ask a technical question...", label="User Message"),
	outputs=gr.Textbox(label="AI Response"),
	title="AI Engineering Project: Serverless LLM",
	description="""
	### Technical Overview:
	* Model: Fine-tuned Qwen-2.5 (3B) via QLoRA.
	* Optimization: GGUF 4-bit quantization.
	* Infrastructure: Deployed on AWS ECS Fargate (4 vCPU) for cost-efficiency.
	* Backend: FastAPI with strict JSON schema enforcement.

	Note: This model runs on serverless CPU infrastructure to optimize hosting costs, so inference may take 10-15 seconds.
	""",
	theme=theme,
	examples=sample_prompts # This automatically creates the clickable buttons
	)

	if __name__ == "__main__":
	demo.launch()