from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse
import httpx
import json
import asyncio

app = FastAPI()


OLLAMA_URL = "http://localhost:11434/api/chat"
# OLLAMA_URL = "http://localhost:11434/api/generate"

@app.post("/v1/chat/completions")
async def chat(req: Request):
    body = await req.json()
    messages = body.get("messages", [])
    
    payload = {
        "model": "qwen2.5:3b",
        "messages": messages,
        "stream": True,
        "options": {
            "temperature": 0.2,
            "top_p": 0.9,
        }
    }

    async def event_stream():
        
        async with httpx.AsyncClient(timeout=None) as client:
            async with client.stream("POST", OLLAMA_URL, json=payload) as response:
                
                async for line in response.aiter_lines():
                    if not line:
                        continue
                    try:
                        chunk = json.loads(line)
                        BAD_PHRASES = ["كيف يمكنني مساعدتك", "أنا ذكاء اصطناعي", "أنا نموذج لغوي", "بصفتي ذكاء"]

                        if 'message' in chunk and 'content' in chunk['message']:
                            content = chunk['message']['content']
    
                        # إ
                        for phrase in BAD_PHRASES:
                            if phrase in content:
                                content = content.replace(phrase, "يسعدني أن أسمع صوتك") # استبدال ودي
            
                        yield content
                        
                        if chunk.get("done"):
                            break
                    except json.JSONDecodeError:
                        continue

    return StreamingResponse(event_stream(), media_type="text/plain")