from fastapi import FastAPI, Request from fastapi.responses import StreamingResponse import httpx import json import asyncio app = FastAPI() # عنوان Ollama داخل الحاوية OLLAMA_URL = "http://localhost:11434/api/chat" @app.post("/v1/chat/completions") async def chat(req: Request): body = await req.json() messages = body.get("messages", []) # إعداد البيانات المرسلة لـ Ollama payload = { "model": "llama3.2:1b", "messages": messages, "stream": True, "options": { "temperature": 0.8, "top_p": 0.9, } } async def event_stream(): # استخدام httpx.AsyncClient للتعامل مع الطلبات غير المتزامنة async with httpx.AsyncClient(timeout=None) as client: async with client.stream("POST", OLLAMA_URL, json=payload) as response: # قراءة الرد سطراً بسطر فور وصوله async for line in response.aiter_lines(): if not line: continue try: chunk = json.loads(line) if 'message' in chunk and 'content' in chunk['message']: content = chunk['message']['content'] # إرسال المحتوى فوراً للمستخدم yield content if chunk.get("done"): break except json.JSONDecodeError: continue return StreamingResponse(event_stream(), media_type="text/plain")