xtime-api / app.py
Rid3's picture
Update app.py
206ca90 verified
import os
import gc
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
model = None
current_id = ""
class ChatRequest(BaseModel):
repo_id: str
filename: str
prompt: str
system_prompt: str = "You are a helpful assistant."
max_tokens: int = 512
temperature: float = 0.7
# Это главная страница. Если ты перейдешь по ссылке в браузере, ты должен увидеть это:
@app.get("/")
async def health():
return {"status": "online", "message": "API is running. Use POST /chat to interact."}
@app.post("/chat")
async def chat(request: ChatRequest):
global model, current_id
new_id = f"{request.repo_id}/{request.filename}"
try:
if model is None or current_id != new_id:
if model is not None:
del model
gc.collect()
path = hf_hub_download(repo_id=request.repo_id, filename=request.filename)
model = Llama(
model_path=path,
n_ctx=2048,
n_threads=os.cpu_count() or 4,
n_gpu_layers=0,
verbose=False
)
current_id = new_id
full_prompt = f"System: {request.system_prompt}\nUser: {request.prompt}\nAssistant:"
output = model.create_completion(
prompt=full_prompt,
max_tokens=request.max_tokens,
temperature=request.temperature,
stop=["User:", "System:", "</s>"]
)
return {
"response": output["choices"][0]["text"].strip(),
"model": current_id
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
# Hugging Face всегда использует порт 7860
uvicorn.run(app, host="0.0.0.0", port=7860)