File size: 2,482 Bytes
85802db 0171e7f 85802db 0171e7f 85802db 0171e7f 85802db 0171e7f 85802db 0171e7f 85802db 0171e7f 85802db 0171e7f 85802db 0171e7f 85802db 0171e7f 85802db 0171e7f 85802db 0171e7f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | from fastapi import FastAPI, Request
from transformers import pipeline
import torch
import uvicorn
import os
app = FastAPI(title="Qwen 0.5B AI Chat API")
# Check for GPU (even though free Space uses CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
print(f"Loading Qwen 2.5 0.5B model on {device}...")
try:
pipe = pipeline(
"text-generation",
model="Qwen/Qwen2.5-0.5B-Instruct",
torch_dtype=dtype,
device_map="auto",
)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {e}")
pipe = None
@app.get("/")
def read_root():
return {
"status": "Chat API is active",
"model": "Qwen 0.5B",
"device": device
}
@app.post("/analyze")
async def chat_endpoint(request: Request):
if pipe is None:
return {"error": "Model not loaded properly. Check logs."}
prompt = ""
try:
# Primary: Accept JSON payload
data = await request.json()
prompt = data.get("prompt", "")
except Exception:
# Fallback: Trying to read form data just in case
try:
form = await request.form()
prompt = form.get("prompt", "")
except:
pass
if not prompt:
return {"error": "لا يوجد نص في الرسالة."}
# Set the personality and language for the model
system_prompt = "أنت مساعد ذكاء اصطناعي طبيب وودود. أجب باللغة العربية بوضوح وإيجاز."
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
try:
# Run inference using the chat template directly
output = pipe(
messages,
max_new_tokens=400,
do_sample=True,
temperature=0.7,
top_p=0.9
)
# The output includes the system, user, and assistant messages. We take the last one.
result = output[0]["generated_text"][-1]["content"]
return {
"analysis": result,
"success": True
}
except Exception as e:
return {"error": f"Failed to generate response: {str(e)}"}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)
|