File size: 2,209 Bytes
462abf2
 
a94984f
8bf4672
a94984f
 
0e057d9
155bed5
3808e95
a94984f
9560ef7
 
8bf4672
9560ef7
 
 
 
 
3808e95
 
a94984f
0e057d9
3808e95
 
 
0e057d9
 
 
a94984f
206ca90
3808e95
 
206ca90
5c6c743
 
 
3808e95
 
 
5c6c743
3808e95
 
 
 
 
 
 
 
206ca90
3808e95
206ca90
3808e95
 
 
 
 
 
 
462abf2
 
3808e95
5c6c743
599a0f5
462abf2
0e057d9
3808e95
462abf2
5c6c743
3808e95
0e057d9
 
 
206ca90
0e057d9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import gc
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

model = None
current_id = ""

class ChatRequest(BaseModel):
    repo_id: str
    filename: str
    prompt: str
    system_prompt: str = "You are a helpful assistant."
    max_tokens: int = 512
    temperature: float = 0.7

# Это главная страница. Если ты перейдешь по ссылке в браузере, ты должен увидеть это:
@app.get("/")
async def health():
    return {"status": "online", "message": "API is running. Use POST /chat to interact."}

@app.post("/chat")
async def chat(request: ChatRequest):
    global model, current_id
    new_id = f"{request.repo_id}/{request.filename}"
    
    try:
        if model is None or current_id != new_id:
            if model is not None:
                del model
                gc.collect()
            
            path = hf_hub_download(repo_id=request.repo_id, filename=request.filename)
            model = Llama(
                model_path=path,
                n_ctx=2048,
                n_threads=os.cpu_count() or 4,
                n_gpu_layers=0,
                verbose=False
            )
            current_id = new_id

        full_prompt = f"System: {request.system_prompt}\nUser: {request.prompt}\nAssistant:"
        output = model.create_completion(
            prompt=full_prompt,
            max_tokens=request.max_tokens,
            temperature=request.temperature,
            stop=["User:", "System:", "</s>"]
        )
        
        return {
            "response": output["choices"][0]["text"].strip(),
            "model": current_id
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    # Hugging Face всегда использует порт 7860
    uvicorn.run(app, host="0.0.0.0", port=7860)