File size: 3,667 Bytes
37de7ca
0745c6f
37de7ca
ac7a783
 
37de7ca
 
 
 
 
ac7a783
 
9d01ea4
82ad52b
0745c6f
37de7ca
 
 
 
ac7a783
9d01ea4
 
ac7a783
 
0745c6f
 
 
 
ac7a783
 
 
0745c6f
 
ac7a783
 
 
 
37de7ca
 
 
 
 
 
 
 
 
ac7a783
37de7ca
9d01ea4
ac7a783
 
0745c6f
ac7a783
0745c6f
 
37de7ca
8067761
 
 
 
 
 
9d01ea4
8067761
 
 
 
 
 
 
 
 
 
 
 
 
ac7a783
8067761
 
 
9d01ea4
8067761
 
 
 
 
 
 
 
 
 
 
 
9d01ea4
ac7a783
 
 
 
8067761
ac7a783
0745c6f
 
 
 
 
8067761
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
OpenWolf 文本 Space — llama-cpp-python(GGUF / MiniCPM-V-4.6-Thinking)
"""
import os, time, threading, uuid
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse

app = FastAPI(title="OpenWolf Text")

_ready = False
_llm = None
_llm_lock = threading.Lock()
_tasks = {}
MODEL_PATH = "/app/models/MiniCPM-V-4_6-Thinking-Q6_K.gguf"
MMPROJ_PATH = "/app/models/mmproj-model-f16.gguf"


@app.on_event("startup")
async def startup():
    threading.Thread(target=_load_model, daemon=True).start()


def _load_model():
    global _llm, _ready
    if not os.path.exists(MODEL_PATH):
        print(f"[models] 模型文件不存在: {MODEL_PATH}")
        return
    print("[models] 加载模型...")
    t0 = time.time()
    from llama_cpp import Llama
    try:
        _llm = Llama(model_path=MODEL_PATH, mmproj=MMPROJ_PATH,
                     n_ctx=2048, n_threads=2, n_gpu_layers=0, verbose=False)
        _ready = True
        print(f"[models] 加载完成 ({time.time()-t0:.1f}s)")
    except Exception as e:
        print(f"[models] 加载失败: {e}")


@app.get("/health")
async def health():
    return {"status": "ok", "ready": _ready}


@app.post("/v1/chat/completions")
async def chat_completions(request: Request):
    if not _ready or _llm is None:
        return JSONResponse({"error": "模型加载中"}, status_code=503)
    body = await request.json()
    messages = body.get("messages", [])
    max_tokens = int(body.get("max_tokens", 512))
    temp = float(body.get("temperature", 0.3))
    with _llm_lock:
        out = _llm.create_chat_completion(messages=messages, max_tokens=max_tokens, temperature=temp)
    return {"choices": [{"message": {"content": out["choices"][0]["message"]["content"].strip()}}]}


@app.post("/task/start")
async def task_start(request: Request):
    body = await request.json()
    task_id = uuid.uuid4().hex[:12]
    _tasks[task_id] = {"status": "running", "result": None}
    threading.Thread(target=_do_task, args=(task_id, body), daemon=True).start()
    return {"ok": True, "task_id": task_id}


@app.get("/task/check/{task_id}")
async def task_check(task_id: str):
    t = _tasks.get(task_id)
    if not t: return {"status": "error", "result": "not found"}
    if t["status"] == "running": return {"status": "running"}
    return {"status": t["status"], "result": t["result"]}


@app.post("/analyze-doc/start")
@app.post("/analyze-text/start")
async def analyze_start(request: Request):
    body = await request.json()
    task_id = uuid.uuid4().hex[:12]
    _tasks[task_id] = {"status": "running", "result": None}
    threading.Thread(target=_do_task, args=(task_id, body), daemon=True).start()
    return {"ok": True, "task_id": task_id}


@app.get("/analyze-doc/check/{task_id}")
@app.get("/analyze-text/check/{task_id}")
async def analyze_check(task_id: str):
    t = _tasks.get(task_id)
    if not t: return {"status": "error", "result": "not found"}
    if t["status"] == "running": return {"status": "running"}
    return {"status": t["status"], "result": t["result"]}


def _do_task(task_id, body):
    text = body.get("task", body.get("text", body.get("question", "")))
    if _llm is None:
        _tasks[task_id] = {"status": "error", "result": "模型未就绪"}
        return
    try:
        with _llm_lock:
            out = _llm.create_chat_completion(
                messages=[{"role": "user", "content": text}],
                max_tokens=2048, temperature=0.3,
            )
        _tasks[task_id] = {"status": "done", "result": out["choices"][0]["message"]["content"]}
    except Exception as e:
        _tasks[task_id] = {"status": "error", "result": str(e)}