Spaces:

hsuwill000
/

ESP01LLMSample

Sleeping

App Files Files Community

hsuwill000 commited on Dec 1, 2025

Commit

08ac672

verified ·

1 Parent(s): 214e263

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -65

app.py CHANGED Viewed

@@ -5,7 +5,8 @@ import sys
 import subprocess
 from typing import List, Dict, Any, Optional
-# --- 0. 內嵌模組安裝 (強制在程式碼內安裝所有依賴) ---
 def install_required_modules():
     """使用 pip 在運行時安裝所有必要的 Python 模組。"""
@@ -14,31 +15,28 @@ def install_required_modules():
         "uvicorn",
         "pydantic",
         "huggingface-hub",
-        "llama-cpp-python" # 這個通常需要較長的時間來編譯
     ]
     print("--- 嘗試動態安裝/升級必要的 Python 模組 ---")
     try:
-        # 執行 pip install 命令
-        # 使用 sys.executable 確保使用當前的 Python 解譯器
         subprocess.check_call([
             sys.executable,
             "-m",
             "pip",
             "install",
-            *required_packages, # 展開列表中的所有套件名
             "--upgrade"
         ])
         print("所有模組安裝/更新成功。")
     except subprocess.CalledProcessError as e:
-        print(f"**致命錯誤**：模組安裝失敗。請檢查環境權限或系統依賴 (尤其是 llama-cpp-python)。錯誤訊息: {e}")
         sys.exit(1)
     except Exception as e:
         print(f"**致命錯誤**：發生未知錯誤。錯誤訊息: {e}")
         sys.exit(1)
-# 執行安裝
 install_required_modules()
@@ -58,7 +56,7 @@ try:
     # 引入 Llama.cpp 模組
     from llama_cpp import Llama
 except ImportError as e:
-    print(f"**致命錯誤**：模組引入失敗，即使嘗試安裝也失敗。錯誤: {e}")
     sys.exit(1)
@@ -66,7 +64,7 @@ except ImportError as e:
 MODEL_NAME = "Qwen3-0.6B-Q8_0.gguf"
 MODEL_REPO = "Qwen/Qwen3-0.6B-GGUF"
-LLAMA_INSTANCE: Optional[Llama] = None # 定義全域 Llama 實例變數
 def initialize_llm():
     """下載模型並初始化 Llama 實例"""
@@ -78,9 +76,7 @@ def initialize_llm():
     print(f"--- 1. 開始下載模型 {MODEL_NAME} ---")
     try:
         model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_NAME)
-        print(f"模型下載完成，路徑: {model_path}")
     except Exception as e:
-        print(f"**致命錯誤**：無法下載模型。錯誤訊息: {e}")
         raise RuntimeError(f"無法下載模型: {e}")
     print("--- 2. 初始化 Llama.cpp 實例 ---")
@@ -90,12 +86,11 @@ def initialize_llm():
             n_ctx=4096,
             n_batch=512,
             n_threads=os.cpu_count() // 2 or 1,
-            n_gpu_layers=0, # CPU 推論 (可根據環境調整)
             verbose=False
         )
         print("Llama.cpp 模型加載成功。")
     except Exception as e:
-        print(f"**致命錯誤**：Llama.cpp 實例初始化失敗。錯誤訊息: {e}")
         raise RuntimeError(f"Llama 實例初始化失敗: {e}")
@@ -115,16 +110,7 @@ app.add_middleware(
 )
-# --- 4. Pydantic 請求模型 ---
-class InferenceRequest(BaseModel):
-    """推論請求的資料結構，基於 OpenAI Chat Completion 格式。"""
-    messages: List[Dict[str, str]]
-    system_message: str = "You are a friendly assistant."
-    max_tokens: int = 4096
-    temperature: float = 0.7
-    top_p: float = 0.95
-    extra_params: Optional[Dict[str, Any]] = {}
 class InferenceRequestMinimal(BaseModel):
     """極簡推論請求的資料結構，僅接收問題。"""
@@ -139,7 +125,6 @@ def get_inference_response(
     max_tokens: int,
     temperature: float = 0.7,
     top_p: float = 0.95,
-    extra_params: Dict[str, Any] = {}
 ) -> str:
     """呼叫 Llama.cpp 實例並返回單一文字回應。"""
@@ -171,7 +156,7 @@ def get_inference_response(
         )
-# --- 6. FastAPI 路由: 健康檢查/首頁 ---
 @app.on_event("startup")
 async def startup_event():
@@ -180,7 +165,7 @@ async def startup_event():
         initialize_llm()
     except Exception as e:
         print(f"應用程式啟動失敗: {e}")
-        # 允許應用程式啟動，但 LLM 服務將會處於不可用狀態 (會拋出 503)
 @app.get("/", summary="首頁/健康檢查")
 async def root():
@@ -188,34 +173,7 @@ async def root():
     return HTMLResponse(content=f"<html><body><h1>LLM API Status: {status}</h1></body></html>", status_code=200)
-# --- 7. FastAPI 路由: 推論端點 v1 (複雜版，與您原有的 /infer 對應) ---
-@app.post("/infer", summary="執行 LLM 推論 (v1)")
-async def infer_endpoint(request: InferenceRequest):
-    try:
-        content = get_inference_response(
-            messages=request.messages,
-            system_message=request.system_message,
-            max_tokens=request.max_tokens,
-            temperature=request.temperature,
-            top_p=request.top_p,
-            extra_params=request.extra_params
-        )
-        return JSONResponse(content={
-            "status": "success",
-            "response": content
-        })
-    except HTTPException as http_ex:
-        raise http_ex
-    except Exception as e:
-        print(f"[Fatal Error] During API call: {e}")
-        raise HTTPException(
-            status_code=500,
-            detail="Internal Server Error."
-        )
-# --- 8. FastAPI 路由: 推論端點 v4 (極簡版，與您原有的 /infer4 對應) ---
 @app.post("/infer4", summary="執行 LLM 推論 (v4: 極簡輸入/僅回傳 response 欄位)")
 async def infer4_endpoint(request: InferenceRequestMinimal):
@@ -240,14 +198,4 @@ async def infer4_endpoint(request: InferenceRequestMinimal):
     except Exception as e:
         print(f"[Fatal Error] During API call: {e}")
         raise HTTPException(
-            status_code=500,
-            detail="Internal Server Error."
-        )
-# --- 9. 啟動應用程式 ---
-if __name__ == "__main__":
-    print("FastAPI 服務正在啟動...")
-    # 在 Gradio Space 中，如果沒有其他設定，這裡可能是您的應用程式入口
-    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)

 import subprocess
 from typing import List, Dict, Any, Optional
+# --- 0. 內嵌模組安裝 ---
+# 警告: 這在許多託管環境中可能因權限不足而失敗。建議使用 requirements.txt。
 def install_required_modules():
     """使用 pip 在運行時安裝所有必要的 Python 模組。"""
         "uvicorn",
         "pydantic",
         "huggingface-hub",
+        "llama-cpp-python"
     ]
     print("--- 嘗試動態安裝/升級必要的 Python 模組 ---")
     try:
         subprocess.check_call([
             sys.executable,
             "-m",
             "pip",
             "install",
+            *required_packages,
             "--upgrade"
         ])
         print("所有模組安裝/更新成功。")
     except subprocess.CalledProcessError as e:
+        print(f"**致命錯誤**：模組安裝失敗。錯誤訊息: {e}")
         sys.exit(1)
     except Exception as e:
         print(f"**致命錯誤**：發生未知錯誤。錯誤訊息: {e}")
         sys.exit(1)
 install_required_modules()
     # 引入 Llama.cpp 模組
     from llama_cpp import Llama
 except ImportError as e:
+    print(f"**致命錯誤**：模組引入失敗。錯誤: {e}")
     sys.exit(1)
 MODEL_NAME = "Qwen3-0.6B-Q8_0.gguf"
 MODEL_REPO = "Qwen/Qwen3-0.6B-GGUF"
+LLAMA_INSTANCE: Optional[Llama] = None # 全域 Llama 實例
 def initialize_llm():
     """下載模型並初始化 Llama 實例"""
     print(f"--- 1. 開始下載模型 {MODEL_NAME} ---")
     try:
         model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_NAME)
     except Exception as e:
         raise RuntimeError(f"無法下載模型: {e}")
     print("--- 2. 初始化 Llama.cpp 實例 ---")
             n_ctx=4096,
             n_batch=512,
             n_threads=os.cpu_count() // 2 or 1,
+            n_gpu_layers=0,
             verbose=False
         )
         print("Llama.cpp 模型加載成功。")
     except Exception as e:
         raise RuntimeError(f"Llama 實例初始化失敗: {e}")
 )
+# --- 4. Pydantic 請求模型 (僅保留極簡版) ---
 class InferenceRequestMinimal(BaseModel):
     """極簡推論請求的資料結構，僅接收問題。"""
     max_tokens: int,
     temperature: float = 0.7,
     top_p: float = 0.95,
 ) -> str:
     """呼叫 Llama.cpp 實例並返回單一文字回應。"""
         )
+# --- 6. FastAPI 路由: / (健康檢查/首頁) ---
 @app.on_event("startup")
 async def startup_event():
         initialize_llm()
     except Exception as e:
         print(f"應用程式啟動失敗: {e}")
+        # 如果初始化失敗，LLM 實例為 None，推論會拋出 503 錯誤
 @app.get("/", summary="首頁/健康檢查")
 async def root():
     return HTMLResponse(content=f"<html><body><h1>LLM API Status: {status}</h1></body></html>", status_code=200)
+# --- 7. FastAPI 路由: /infer4 (極簡版) ---
 @app.post("/infer4", summary="執行 LLM 推論 (v4: 極簡輸入/僅回傳 response 欄位)")
 async def infer4_endpoint(request: InferenceRequestMinimal):
     except Exception as e:
         print(f"[Fatal Error] During API call: {e}")
         raise HTTPException(
+            status_code=500,