Spaces:

hsuwill000
/

ESP01LLMSample

Sleeping

App Files Files Community

hsuwill000 commited on 14 days ago

Commit

bbd2fc4

verified ·

1 Parent(s): 7eb80ae

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -2

app.py CHANGED Viewed

@@ -13,7 +13,8 @@ def install_required_modules():
         "uvicorn",
         "pydantic",
         "huggingface-hub",
-        "llama-cpp-python"
     ]
     # ----------------------------------------------------
@@ -73,6 +74,10 @@ try:
     # 引入 Llama.cpp 模組
     from llama_cpp import Llama, llama_print_system_info # 增加 system info 檢查
 except ImportError as e:
     print(f"**致命錯誤**：模組引入失敗。錯誤: {e}")
     sys.exit(1)
@@ -86,6 +91,9 @@ MODEL_NAME = "Qwen3-0.6B-IQ4_XS.gguf"
 MODEL_REPO = "unsloth/Qwen3-0.6B-GGUF"
 LLAMA_INSTANCE: Optional[Llama] = None # 全域 Llama 實例
 def initialize_llm():
     """下載模型並初始化 Llama 實例"""
     global LLAMA_INSTANCE
@@ -228,8 +236,46 @@ async def infer4_endpoint(request: InferenceRequestMinimal):
             detail="Internal Server Error."
         )
-# --- 8. 啟動應用程式 ---
 if __name__ == "__main__":
     print("FastAPI 服務正在啟動...")

         "uvicorn",
         "pydantic",
         "huggingface-hub",
+        "llama-cpp-python",
+        "gradio_client" # <-- 新增 gradio_client
     ]
     # ----------------------------------------------------
     # 引入 Llama.cpp 模組
     from llama_cpp import Llama, llama_print_system_info # 增加 system info 檢查
+    # 引入 gradio_client 模組
+    from gradio_client import Client
 except ImportError as e:
     print(f"**致命錯誤**：模組引入失敗。錯誤: {e}")
     sys.exit(1)
 MODEL_REPO = "unsloth/Qwen3-0.6B-GGUF"
 LLAMA_INSTANCE: Optional[Llama] = None # 全域 Llama 實例
+# Gradio Client 設定變數
+AMD_SPACE_ID = "amd/gpt-oss-120b-chatbot" # <-- 新增 Gradio Space ID 變數
 def initialize_llm():
     """下載模型並初始化 Llama 實例"""
     global LLAMA_INSTANCE
             detail="Internal Server Error."
         )
+# --- 8. FastAPI 路由: /infer_amd (使用 Gradio Client) ---
+@app.post("/infer_amd", summary="使用 Gradio Client 呼叫外部 AMD LLM Space")
+async def infer_amd_endpoint(request: InferenceRequestMinimal):
+    """
+    使用 gradio_client 呼叫 AMD_SPACE_ID 所指定的 Space 的 /chat API。
+    輸入/輸出格式與 /infer4 相同。
+    """
+    try:
+        # 初始化 Gradio Client，使用定義在全域的 AMD_SPACE_ID
+        client = Client(AMD_SPACE_ID)
+        # 呼叫 Space API
+        result = client.predict(
+            message=request.question, # 使用請求中的 question
+            system_prompt="You are a helpful assistant.",
+            temperature=0.7,
+            api_name="/chat"
+        )
+        # 處理結果並以 /infer4 格式回傳
+        if isinstance(result, str):
+             return JSONResponse(content={
+                "response": result
+             })
+        else:
+             # 如果回傳不是字串，拋出內部錯誤
+             raise ValueError("外部 API 回傳格式非預期的字串。")
+    except Exception as e:
+        print(f"[Fatal Error] Gradio Client API call failed: {e}")
+        # 針對外部 API 錯誤，回傳 503 服務不可用
+        raise HTTPException(
+            status_code=503,
+            detail=f"External AMD LLM Service Error: {e}"
+        )
+# --- 9. 啟動應用程式 ---
 if __name__ == "__main__":
     print("FastAPI 服務正在啟動...")