from fastapi import FastAPI
from llama_cpp import Llama

app = FastAPI()

# تحميل الموديل من Hugging Face أوتماتيكياً
llm = Llama.from_pretrained(
    repo_id="Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF",
    filename="Llama-3-8B-Lexi-Uncensored.Q4_K_M.gguf",
    n_ctx=2048,
    n_threads=2 # باش ما يتقالوش السيرفر فابور
)

@app.get("/")
def home():
    return {"message": "EVA AI Server is Running!"}

@app.post("/chat")
async def chat(data: dict):
    prompt = data.get("prompt", "")
    # ستايل الحوار
    full_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
    
    output = llm(
        full_prompt, 
        max_tokens=500, 
        stop=["<|im_end|>"],
        echo=False
    )
    return {"response": output['choices'][0]['text']}