CHAT_AI / app.py
Hassan73's picture
Upload app.py
85802db verified
from fastapi import FastAPI, Request
from transformers import pipeline
import torch
import uvicorn
import os
app = FastAPI(title="Qwen 0.5B AI Chat API")
# Check for GPU (even though free Space uses CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
print(f"Loading Qwen 2.5 0.5B model on {device}...")
try:
pipe = pipeline(
"text-generation",
model="Qwen/Qwen2.5-0.5B-Instruct",
torch_dtype=dtype,
device_map="auto",
)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {e}")
pipe = None
@app.get("/")
def read_root():
return {
"status": "Chat API is active",
"model": "Qwen 0.5B",
"device": device
}
@app.post("/analyze")
async def chat_endpoint(request: Request):
if pipe is None:
return {"error": "Model not loaded properly. Check logs."}
prompt = ""
try:
# Primary: Accept JSON payload
data = await request.json()
prompt = data.get("prompt", "")
except Exception:
# Fallback: Trying to read form data just in case
try:
form = await request.form()
prompt = form.get("prompt", "")
except:
pass
if not prompt:
return {"error": "لا يوجد نص في الرسالة."}
# Set the personality and language for the model
system_prompt = "أنت مساعد ذكاء اصطناعي طبيب وودود. أجب باللغة العربية بوضوح وإيجاز."
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
try:
# Run inference using the chat template directly
output = pipe(
messages,
max_new_tokens=400,
do_sample=True,
temperature=0.7,
top_p=0.9
)
# The output includes the system, user, and assistant messages. We take the last one.
result = output[0]["generated_text"][-1]["content"]
return {
"analysis": result,
"success": True
}
except Exception as e:
return {"error": f"Failed to generate response: {str(e)}"}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)