File size: 2,209 Bytes
462abf2 a94984f 8bf4672 a94984f 0e057d9 155bed5 3808e95 a94984f 9560ef7 8bf4672 9560ef7 3808e95 a94984f 0e057d9 3808e95 0e057d9 a94984f 206ca90 3808e95 206ca90 5c6c743 3808e95 5c6c743 3808e95 206ca90 3808e95 206ca90 3808e95 462abf2 3808e95 5c6c743 599a0f5 462abf2 0e057d9 3808e95 462abf2 5c6c743 3808e95 0e057d9 206ca90 0e057d9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | import os
import gc
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
model = None
current_id = ""
class ChatRequest(BaseModel):
repo_id: str
filename: str
prompt: str
system_prompt: str = "You are a helpful assistant."
max_tokens: int = 512
temperature: float = 0.7
# Это главная страница. Если ты перейдешь по ссылке в браузере, ты должен увидеть это:
@app.get("/")
async def health():
return {"status": "online", "message": "API is running. Use POST /chat to interact."}
@app.post("/chat")
async def chat(request: ChatRequest):
global model, current_id
new_id = f"{request.repo_id}/{request.filename}"
try:
if model is None or current_id != new_id:
if model is not None:
del model
gc.collect()
path = hf_hub_download(repo_id=request.repo_id, filename=request.filename)
model = Llama(
model_path=path,
n_ctx=2048,
n_threads=os.cpu_count() or 4,
n_gpu_layers=0,
verbose=False
)
current_id = new_id
full_prompt = f"System: {request.system_prompt}\nUser: {request.prompt}\nAssistant:"
output = model.create_completion(
prompt=full_prompt,
max_tokens=request.max_tokens,
temperature=request.temperature,
stop=["User:", "System:", "</s>"]
)
return {
"response": output["choices"][0]["text"].strip(),
"model": current_id
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
# Hugging Face всегда использует порт 7860
uvicorn.run(app, host="0.0.0.0", port=7860) |