from fastapi import FastAPI from llama_cpp import Llama app = FastAPI() # تحميل الموديل ديالك llm = Llama.from_pretrained( repo_id="othmanezaid77/my-eva-model", filename="Llama-3.1-8B-Instruct.gguf", n_ctx=2048 ) @app.post("/chat") async def chat(data: dict): prompt = data.get("prompt", "") full_prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" output = llm(full_prompt, max_tokens=500, stop=["<|eot_id|>"]) return {"response": output['choices'][0]['text']}