from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse
import httpx
import json
import asyncio

app = FastAPI()

# عنوان Ollama داخل الحاوية
OLLAMA_URL = "http://localhost:11434/api/chat"

@app.post("/v1/chat/completions")
async def chat(req: Request):
    body = await req.json()
    messages = body.get("messages", [])
    
    # إعداد البيانات المرسلة لـ Ollama
    payload = {
        "model": "llama3.2:1b",
        "messages": messages,
        "stream": True,
        "options": {
            "temperature": 0.8,
            "top_p": 0.9,
        }
    }

    async def event_stream():
        # استخدام httpx.AsyncClient للتعامل مع الطلبات غير المتزامنة
        async with httpx.AsyncClient(timeout=None) as client:
            async with client.stream("POST", OLLAMA_URL, json=payload) as response:
                # قراءة الرد سطراً بسطر فور وصوله
                async for line in response.aiter_lines():
                    if not line:
                        continue
                    try:
                        chunk = json.loads(line)
                        if 'message' in chunk and 'content' in chunk['message']:
                            content = chunk['message']['content']
                            # إرسال المحتوى فوراً للمستخدم
                            yield content
                        
                        if chunk.get("done"):
                            break
                    except json.JSONDecodeError:
                        continue

    return StreamingResponse(event_stream(), media_type="text/plain")