from fastapi import FastAPI, Request from fastapi.responses import StreamingResponse import httpx import json import asyncio app = FastAPI() OLLAMA_URL = "http://localhost:11434/api/chat" # OLLAMA_URL = "http://localhost:11434/api/generate" @app.post("/v1/chat/completions") async def chat(req: Request): body = await req.json() messages = body.get("messages", []) payload = { "model": "qwen2.5:3b", "messages": messages, "stream": True, "options": { "temperature": 0.2, "top_p": 0.9, } } async def event_stream(): async with httpx.AsyncClient(timeout=None) as client: async with client.stream("POST", OLLAMA_URL, json=payload) as response: async for line in response.aiter_lines(): if not line: continue try: chunk = json.loads(line) BAD_PHRASES = ["كيف يمكنني مساعدتك", "أنا ذكاء اصطناعي", "أنا نموذج لغوي", "بصفتي ذكاء"] if 'message' in chunk and 'content' in chunk['message']: content = chunk['message']['content'] # إ for phrase in BAD_PHRASES: if phrase in content: content = content.replace(phrase, "يسعدني أن أسمع صوتك") # استبدال ودي yield content if chunk.get("done"): break except json.JSONDecodeError: continue return StreamingResponse(event_stream(), media_type="text/plain")