Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,8 @@ def install_required_modules():
|
|
| 13 |
"uvicorn",
|
| 14 |
"pydantic",
|
| 15 |
"huggingface-hub",
|
| 16 |
-
"llama-cpp-python"
|
|
|
|
| 17 |
]
|
| 18 |
|
| 19 |
# ----------------------------------------------------
|
|
@@ -73,6 +74,10 @@ try:
|
|
| 73 |
|
| 74 |
# 引入 Llama.cpp 模組
|
| 75 |
from llama_cpp import Llama, llama_print_system_info # 增加 system info 檢查
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
except ImportError as e:
|
| 77 |
print(f"**致命錯誤**:模組引入失敗。錯誤: {e}")
|
| 78 |
sys.exit(1)
|
|
@@ -86,6 +91,9 @@ MODEL_NAME = "Qwen3-0.6B-IQ4_XS.gguf"
|
|
| 86 |
MODEL_REPO = "unsloth/Qwen3-0.6B-GGUF"
|
| 87 |
LLAMA_INSTANCE: Optional[Llama] = None # 全域 Llama 實例
|
| 88 |
|
|
|
|
|
|
|
|
|
|
| 89 |
def initialize_llm():
|
| 90 |
"""下載模型並初始化 Llama 實例"""
|
| 91 |
global LLAMA_INSTANCE
|
|
@@ -228,8 +236,46 @@ async def infer4_endpoint(request: InferenceRequestMinimal):
|
|
| 228 |
detail="Internal Server Error."
|
| 229 |
)
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
-
# ---
|
| 233 |
|
| 234 |
if __name__ == "__main__":
|
| 235 |
print("FastAPI 服務正在啟動...")
|
|
|
|
| 13 |
"uvicorn",
|
| 14 |
"pydantic",
|
| 15 |
"huggingface-hub",
|
| 16 |
+
"llama-cpp-python",
|
| 17 |
+
"gradio_client" # <-- 新增 gradio_client
|
| 18 |
]
|
| 19 |
|
| 20 |
# ----------------------------------------------------
|
|
|
|
| 74 |
|
| 75 |
# 引入 Llama.cpp 模組
|
| 76 |
from llama_cpp import Llama, llama_print_system_info # 增加 system info 檢查
|
| 77 |
+
|
| 78 |
+
# 引入 gradio_client 模組
|
| 79 |
+
from gradio_client import Client
|
| 80 |
+
|
| 81 |
except ImportError as e:
|
| 82 |
print(f"**致命錯誤**:模組引入失敗。錯誤: {e}")
|
| 83 |
sys.exit(1)
|
|
|
|
| 91 |
MODEL_REPO = "unsloth/Qwen3-0.6B-GGUF"
|
| 92 |
LLAMA_INSTANCE: Optional[Llama] = None # 全域 Llama 實例
|
| 93 |
|
| 94 |
+
# Gradio Client 設定變數
|
| 95 |
+
AMD_SPACE_ID = "amd/gpt-oss-120b-chatbot" # <-- 新增 Gradio Space ID 變數
|
| 96 |
+
|
| 97 |
def initialize_llm():
|
| 98 |
"""下載模型並初始化 Llama 實例"""
|
| 99 |
global LLAMA_INSTANCE
|
|
|
|
| 236 |
detail="Internal Server Error."
|
| 237 |
)
|
| 238 |
|
| 239 |
+
|
| 240 |
+
# --- 8. FastAPI 路由: /infer_amd (使用 Gradio Client) ---
|
| 241 |
+
|
| 242 |
+
@app.post("/infer_amd", summary="使用 Gradio Client 呼叫外部 AMD LLM Space")
|
| 243 |
+
async def infer_amd_endpoint(request: InferenceRequestMinimal):
|
| 244 |
+
"""
|
| 245 |
+
使用 gradio_client 呼叫 AMD_SPACE_ID 所指定的 Space 的 /chat API。
|
| 246 |
+
輸入/輸出格式與 /infer4 相同。
|
| 247 |
+
"""
|
| 248 |
+
try:
|
| 249 |
+
# 初始化 Gradio Client,使用定義在全域的 AMD_SPACE_ID
|
| 250 |
+
client = Client(AMD_SPACE_ID)
|
| 251 |
+
|
| 252 |
+
# 呼叫 Space API
|
| 253 |
+
result = client.predict(
|
| 254 |
+
message=request.question, # 使用請求中的 question
|
| 255 |
+
system_prompt="You are a helpful assistant.",
|
| 256 |
+
temperature=0.7,
|
| 257 |
+
api_name="/chat"
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
# 處理結果並以 /infer4 格式回傳
|
| 261 |
+
if isinstance(result, str):
|
| 262 |
+
return JSONResponse(content={
|
| 263 |
+
"response": result
|
| 264 |
+
})
|
| 265 |
+
else:
|
| 266 |
+
# 如果回傳不是字串,拋出內部錯誤
|
| 267 |
+
raise ValueError("外部 API 回傳格式非預期的字串。")
|
| 268 |
+
|
| 269 |
+
except Exception as e:
|
| 270 |
+
print(f"[Fatal Error] Gradio Client API call failed: {e}")
|
| 271 |
+
# 針對外部 API 錯誤,回傳 503 服務不可用
|
| 272 |
+
raise HTTPException(
|
| 273 |
+
status_code=503,
|
| 274 |
+
detail=f"External AMD LLM Service Error: {e}"
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
|
| 278 |
+
# --- 9. 啟動應用程式 ---
|
| 279 |
|
| 280 |
if __name__ == "__main__":
|
| 281 |
print("FastAPI 服務正在啟動...")
|