Spaces:
Sleeping
Sleeping
| """ | |
| OpenWolf 文本 Space — llama-cpp-python(GGUF / MiniCPM-V-4.6-Thinking) | |
| """ | |
| import os, time, threading, uuid | |
| from fastapi import FastAPI, Request | |
| from fastapi.responses import JSONResponse | |
| app = FastAPI(title="OpenWolf Text") | |
| _ready = False | |
| _llm = None | |
| _llm_lock = threading.Lock() | |
| _tasks = {} | |
| MODEL_PATH = "/app/models/MiniCPM-V-4_6-Thinking-Q6_K.gguf" | |
| MMPROJ_PATH = "/app/models/mmproj-model-f16.gguf" | |
| async def startup(): | |
| threading.Thread(target=_load_model, daemon=True).start() | |
| def _load_model(): | |
| global _llm, _ready | |
| if not os.path.exists(MODEL_PATH): | |
| print(f"[models] 模型文件不存在: {MODEL_PATH}") | |
| return | |
| print("[models] 加载模型...") | |
| t0 = time.time() | |
| from llama_cpp import Llama | |
| try: | |
| _llm = Llama(model_path=MODEL_PATH, mmproj=MMPROJ_PATH, | |
| n_ctx=2048, n_threads=2, n_gpu_layers=0, verbose=False) | |
| _ready = True | |
| print(f"[models] 加载完成 ({time.time()-t0:.1f}s)") | |
| except Exception as e: | |
| print(f"[models] 加载失败: {e}") | |
| async def health(): | |
| return {"status": "ok", "ready": _ready} | |
| async def chat_completions(request: Request): | |
| if not _ready or _llm is None: | |
| return JSONResponse({"error": "模型加载中"}, status_code=503) | |
| body = await request.json() | |
| messages = body.get("messages", []) | |
| max_tokens = int(body.get("max_tokens", 512)) | |
| temp = float(body.get("temperature", 0.3)) | |
| with _llm_lock: | |
| out = _llm.create_chat_completion(messages=messages, max_tokens=max_tokens, temperature=temp) | |
| return {"choices": [{"message": {"content": out["choices"][0]["message"]["content"].strip()}}]} | |
| async def task_start(request: Request): | |
| body = await request.json() | |
| task_id = uuid.uuid4().hex[:12] | |
| _tasks[task_id] = {"status": "running", "result": None} | |
| threading.Thread(target=_do_task, args=(task_id, body), daemon=True).start() | |
| return {"ok": True, "task_id": task_id} | |
| async def task_check(task_id: str): | |
| t = _tasks.get(task_id) | |
| if not t: return {"status": "error", "result": "not found"} | |
| if t["status"] == "running": return {"status": "running"} | |
| return {"status": t["status"], "result": t["result"]} | |
| async def analyze_start(request: Request): | |
| body = await request.json() | |
| task_id = uuid.uuid4().hex[:12] | |
| _tasks[task_id] = {"status": "running", "result": None} | |
| threading.Thread(target=_do_task, args=(task_id, body), daemon=True).start() | |
| return {"ok": True, "task_id": task_id} | |
| async def analyze_check(task_id: str): | |
| t = _tasks.get(task_id) | |
| if not t: return {"status": "error", "result": "not found"} | |
| if t["status"] == "running": return {"status": "running"} | |
| return {"status": t["status"], "result": t["result"]} | |
| def _do_task(task_id, body): | |
| text = body.get("task", body.get("text", body.get("question", ""))) | |
| if _llm is None: | |
| _tasks[task_id] = {"status": "error", "result": "模型未就绪"} | |
| return | |
| try: | |
| with _llm_lock: | |
| out = _llm.create_chat_completion( | |
| messages=[{"role": "user", "content": text}], | |
| max_tokens=2048, temperature=0.3, | |
| ) | |
| _tasks[task_id] = {"status": "done", "result": out["choices"][0]["message"]["content"]} | |
| except Exception as e: | |
| _tasks[task_id] = {"status": "error", "result": str(e)} | |