Spaces:

Rid3
/

xtime-api

Sleeping

File size: 2,209 Bytes

import os
import gc
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

model = None
current_id = ""

class ChatRequest(BaseModel):
    repo_id: str
    filename: str
    prompt: str
    system_prompt: str = "You are a helpful assistant."
    max_tokens: int = 512
    temperature: float = 0.7

# Это главная страница. Если ты перейдешь по ссылке в браузере, ты должен увидеть это:
@app.get("/")
async def health():
    return {"status": "online", "message": "API is running. Use POST /chat to interact."}

@app.post("/chat")
async def chat(request: ChatRequest):
    global model, current_id
    new_id = f"{request.repo_id}/{request.filename}"
    
    try:
        if model is None or current_id != new_id:
            if model is not None:
                del model
                gc.collect()
            
            path = hf_hub_download(repo_id=request.repo_id, filename=request.filename)
            model = Llama(
                model_path=path,
                n_ctx=2048,
                n_threads=os.cpu_count() or 4,
                n_gpu_layers=0,
                verbose=False
            )
            current_id = new_id

        full_prompt = f"System: {request.system_prompt}\nUser: {request.prompt}\nAssistant:"
        output = model.create_completion(
            prompt=full_prompt,
            max_tokens=request.max_tokens,
            temperature=request.temperature,
            stop=["User:", "System:", "</s>"]
        )
        
        return {
            "response": output["choices"][0]["text"].strip(),
            "model": current_id
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    # Hugging Face всегда использует порт 7860
    uvicorn.run(app, host="0.0.0.0", port=7860)