File size: 599 Bytes
94bb3e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from fastapi import FastAPI
from llama_cpp import Llama

app = FastAPI()

# تحميل الموديل ديالك
llm = Llama.from_pretrained(
    repo_id="othmanezaid77/my-eva-model", 
    filename="Llama-3.1-8B-Instruct.gguf",
    n_ctx=2048
)

@app.post("/chat")
async def chat(data: dict):
    prompt = data.get("prompt", "")
    full_prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
    output = llm(full_prompt, max_tokens=500, stop=["<|eot_id|>"])
    return {"response": output['choices'][0]['text']}