Spaces:

Hassan73
/

CHAT_AI

Sleeping

File size: 2,482 Bytes

from fastapi import FastAPI, Request
from transformers import pipeline
import torch
import uvicorn
import os

app = FastAPI(title="Qwen 0.5B AI Chat API")

# Check for GPU (even though free Space uses CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32

print(f"Loading Qwen 2.5 0.5B model on {device}...")

try:
    pipe = pipeline(
        "text-generation",
        model="Qwen/Qwen2.5-0.5B-Instruct",
        torch_dtype=dtype,
        device_map="auto",
    )
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")
    pipe = None

@app.get("/")
def read_root():
    return {
        "status": "Chat API is active",
        "model": "Qwen 0.5B",
        "device": device
    }

@app.post("/analyze")
async def chat_endpoint(request: Request):
    if pipe is None:
        return {"error": "Model not loaded properly. Check logs."}
    
    prompt = ""
    try:
        # Primary: Accept JSON payload
        data = await request.json()
        prompt = data.get("prompt", "")
    except Exception:
        # Fallback: Trying to read form data just in case
        try:
            form = await request.form()
            prompt = form.get("prompt", "")
        except:
            pass

    if not prompt:
        return {"error": "لا يوجد نص في الرسالة."}

    # Set the personality and language for the model
    system_prompt = "أنت مساعد ذكاء اصطناعي طبيب وودود. أجب باللغة العربية بوضوح وإيجاز."
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt}
    ]
    
    try:
        # Run inference using the chat template directly
        output = pipe(
            messages,
            max_new_tokens=400,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )
        
        # The output includes the system, user, and assistant messages. We take the last one.
        result = output[0]["generated_text"][-1]["content"]
        
        return {
            "analysis": result,
            "success": True
        }
    except Exception as e:
        return {"error": f"Failed to generate response: {str(e)}"}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)