Spaces:
Sleeping
Sleeping
File size: 6,160 Bytes
defa84e df53ff4 214e263 08ac672 214e263 08ac672 214e263 08ac672 214e263 08ac672 214e263 defa84e df53ff4 214e263 df53ff4 214e263 df53ff4 214e263 08ac672 df53ff4 214e263 defa84e 08ac672 defa84e 214e263 defa84e 214e263 defa84e 214e263 08ac672 214e263 defa84e 214e263 defa84e 214e263 defa84e 214e263 defa84e 214e263 08ac672 214e263 defa84e 214e263 defa84e 214e263 defa84e 214e263 defa84e 214e263 defa84e 5fc3b1a defa84e 214e263 5fc3b1a 08ac672 defa84e 214e263 08ac672 5fc3b1a 214e263 5fc3b1a 214e263 08ac672 214e263 f1706d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# app.py
import os
import sys
import subprocess
from typing import List, Dict, Any, Optional
# --- 0. 內嵌模組安裝 ---
# 警告: 這在許多託管環境中可能因權限不足而失敗。建議使用 requirements.txt。
def install_required_modules():
"""使用 pip 在運行時安裝所有必要的 Python 模組。"""
required_packages = [
"fastapi",
"uvicorn",
"pydantic",
"huggingface-hub",
"llama-cpp-python"
]
print("--- 嘗試動態安裝/升級必要的 Python 模組 ---")
try:
subprocess.check_call([
sys.executable,
"-m",
"pip",
"install",
*required_packages,
"--upgrade"
])
print("所有模組安裝/更新成功。")
except subprocess.CalledProcessError as e:
print(f"**致命錯誤**:模組安裝失敗。錯誤訊息: {e}")
sys.exit(1)
except Exception as e:
print(f"**致命錯誤**:發生未知錯誤。錯誤訊息: {e}")
sys.exit(1)
install_required_modules()
# --- 1. 模組引入 (必須在安裝之後) ---
try:
# 引入 FastAPI 相關模組
from pydantic import BaseModel, Field
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
# 引入模型下載工具
from huggingface_hub import hf_hub_download
# 引入 Llama.cpp 模組
from llama_cpp import Llama
except ImportError as e:
print(f"**致命錯誤**:模組引入失敗。錯誤: {e}")
sys.exit(1)
# --- 2. 模型設定與初始化 ---
MODEL_NAME = "Qwen3-0.6B-Q8_0.gguf"
MODEL_REPO = "Qwen/Qwen3-0.6B-GGUF"
LLAMA_INSTANCE: Optional[Llama] = None # 全域 Llama 實例
def initialize_llm():
"""下載模型並初始化 Llama 實例"""
global LLAMA_INSTANCE
if LLAMA_INSTANCE is not None:
return
print(f"--- 1. 開始下載模型 {MODEL_NAME} ---")
try:
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_NAME)
except Exception as e:
raise RuntimeError(f"無法下載模型: {e}")
print("--- 2. 初始化 Llama.cpp 實例 ---")
try:
LLAMA_INSTANCE = Llama(
model_path=model_path,
n_ctx=4096,
n_batch=512,
n_threads=os.cpu_count() // 2 or 1,
n_gpu_layers=0,
verbose=False
)
print("Llama.cpp 模型加載成功。")
except Exception as e:
raise RuntimeError(f"Llama 實例初始化失敗: {e}")
# --- 3. FastAPI 設定與中介層 (Middleware) ---
app = FastAPI(
title="LLM 推論 API (Llama.cpp)",
description="直接使用 Llama.cpp 進行推論的 API 服務。"
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# --- 4. Pydantic 請求模型 (僅保留極簡版) ---
class InferenceRequestMinimal(BaseModel):
"""極簡推論請求的資料結構,僅接收問題。"""
question: str = Field(..., description="使用者輸入的問題或提示。")
# --- 5. 推論核心函式 (非流式) ---
def get_inference_response(
messages: List[Dict[str, str]],
system_message: str,
max_tokens: int,
temperature: float = 0.7,
top_p: float = 0.95,
) -> str:
"""呼叫 Llama.cpp 實例並返回單一文字回應。"""
if LLAMA_INSTANCE is None:
raise HTTPException(status_code=503, detail="LLM 服務尚未初始化。")
full_messages = [{"role": "system", "content": system_message}]
full_messages.extend(messages)
try:
response = LLAMA_INSTANCE.create_chat_completion(
messages=full_messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
if response.get('choices') and response['choices'][0].get('message') and response['choices'][0]['message'].get('content'):
content = response['choices'][0]['message']['content']
return content
return "⚠️ LLM 服務回傳空內容。"
except Exception as e:
print(f"[Error] LLM Inference failed: {e}")
raise HTTPException(
status_code=503,
detail=f"LLM Server Response Error: {e}"
)
# --- 6. FastAPI 路由: / (健康檢查/首頁) ---
@app.on_event("startup")
async def startup_event():
"""FastAPI 啟動時執行模型初始化"""
try:
initialize_llm()
except Exception as e:
print(f"應用程式啟動失敗: {e}")
# 如果初始化失敗,LLM 實例為 None,推論會拋出 503 錯誤
@app.get("/", summary="首頁/健康檢查")
async def root():
status = "running" if LLAMA_INSTANCE else "starting/failed (LLM unavailable)"
return HTMLResponse(content=f"<html><body><h1>LLM API Status: {status}</h1></body></html>", status_code=200)
# --- 7. FastAPI 路由: /infer4 (極簡版) ---
@app.post("/infer4", summary="執行 LLM 推論 (v4: 極簡輸入/僅回傳 response 欄位)")
async def infer4_endpoint(request: InferenceRequestMinimal):
FIXED_SYSTEM_MESSAGE = "You are a friendly and concise assistant."
FIXED_MAX_TOKENS = 4096
try:
messages = [{"role": "user", "content": request.question}]
content = get_inference_response(
messages=messages,
system_message=FIXED_SYSTEM_MESSAGE,
max_tokens=FIXED_MAX_TOKENS,
)
return JSONResponse(content={
"response": content
})
except HTTPException as http_ex:
raise http_ex
except Exception as e:
print(f"[Fatal Error] During API call: {e}")
raise HTTPException(
status_code=500,
detail="Internal Server Error."
)
# --- 8. 啟動應用程式 ---
if __name__ == "__main__":
print("FastAPI 服務正在啟動...")
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False) |