hsuwill000 commited on
Commit
08ac672
·
verified ·
1 Parent(s): 214e263

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -65
app.py CHANGED
@@ -5,7 +5,8 @@ import sys
5
  import subprocess
6
  from typing import List, Dict, Any, Optional
7
 
8
- # --- 0. 內嵌模組安裝 (強制在程式碼內安裝所有依賴) ---
 
9
 
10
  def install_required_modules():
11
  """使用 pip 在運行時安裝所有必要的 Python 模組。"""
@@ -14,31 +15,28 @@ def install_required_modules():
14
  "uvicorn",
15
  "pydantic",
16
  "huggingface-hub",
17
- "llama-cpp-python" # 這個通常需要較長的時間來編譯
18
  ]
19
 
20
  print("--- 嘗試動態安裝/升級必要的 Python 模組 ---")
21
 
22
  try:
23
- # 執行 pip install 命令
24
- # 使用 sys.executable 確保使用當前的 Python 解譯器
25
  subprocess.check_call([
26
  sys.executable,
27
  "-m",
28
  "pip",
29
  "install",
30
- *required_packages, # 展開列表中的所有套件名
31
  "--upgrade"
32
  ])
33
  print("所有模組安裝/更新成功。")
34
  except subprocess.CalledProcessError as e:
35
- print(f"**致命錯誤**:模組安裝失敗。請檢查環境權限或系統依賴 (尤其是 llama-cpp-python)。錯誤訊息: {e}")
36
  sys.exit(1)
37
  except Exception as e:
38
  print(f"**致命錯誤**:發生未知錯誤。錯誤訊息: {e}")
39
  sys.exit(1)
40
 
41
- # 執行安裝
42
  install_required_modules()
43
 
44
 
@@ -58,7 +56,7 @@ try:
58
  # 引入 Llama.cpp 模組
59
  from llama_cpp import Llama
60
  except ImportError as e:
61
- print(f"**致命錯誤**:模組引入失敗,即使嘗試安裝也失敗。錯誤: {e}")
62
  sys.exit(1)
63
 
64
 
@@ -66,7 +64,7 @@ except ImportError as e:
66
 
67
  MODEL_NAME = "Qwen3-0.6B-Q8_0.gguf"
68
  MODEL_REPO = "Qwen/Qwen3-0.6B-GGUF"
69
- LLAMA_INSTANCE: Optional[Llama] = None # 定義全域 Llama 實例變數
70
 
71
  def initialize_llm():
72
  """下載模型並初始化 Llama 實例"""
@@ -78,9 +76,7 @@ def initialize_llm():
78
  print(f"--- 1. 開始下載模型 {MODEL_NAME} ---")
79
  try:
80
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_NAME)
81
- print(f"模型下載完成,路徑: {model_path}")
82
  except Exception as e:
83
- print(f"**致命錯誤**:無法下載模型。錯誤訊息: {e}")
84
  raise RuntimeError(f"無法下載模型: {e}")
85
 
86
  print("--- 2. 初始化 Llama.cpp 實例 ---")
@@ -90,12 +86,11 @@ def initialize_llm():
90
  n_ctx=4096,
91
  n_batch=512,
92
  n_threads=os.cpu_count() // 2 or 1,
93
- n_gpu_layers=0, # CPU 推論 (可根據環境調整)
94
  verbose=False
95
  )
96
  print("Llama.cpp 模型加載成功。")
97
  except Exception as e:
98
- print(f"**致命錯誤**:Llama.cpp 實例初始化失敗。錯誤訊息: {e}")
99
  raise RuntimeError(f"Llama 實例初始化失敗: {e}")
100
 
101
 
@@ -115,16 +110,7 @@ app.add_middleware(
115
  )
116
 
117
 
118
- # --- 4. Pydantic 請求模型 ---
119
-
120
- class InferenceRequest(BaseModel):
121
- """推論請求的資料結構,基於 OpenAI Chat Completion 格式。"""
122
- messages: List[Dict[str, str]]
123
- system_message: str = "You are a friendly assistant."
124
- max_tokens: int = 4096
125
- temperature: float = 0.7
126
- top_p: float = 0.95
127
- extra_params: Optional[Dict[str, Any]] = {}
128
 
129
  class InferenceRequestMinimal(BaseModel):
130
  """極簡推論請求的資料結構,僅接收問題。"""
@@ -139,7 +125,6 @@ def get_inference_response(
139
  max_tokens: int,
140
  temperature: float = 0.7,
141
  top_p: float = 0.95,
142
- extra_params: Dict[str, Any] = {}
143
  ) -> str:
144
  """呼叫 Llama.cpp 實例並返回單一文字回應。"""
145
 
@@ -171,7 +156,7 @@ def get_inference_response(
171
  )
172
 
173
 
174
- # --- 6. FastAPI 路由: 健康檢查/首頁 ---
175
 
176
  @app.on_event("startup")
177
  async def startup_event():
@@ -180,7 +165,7 @@ async def startup_event():
180
  initialize_llm()
181
  except Exception as e:
182
  print(f"應用程式啟動失敗: {e}")
183
- # 允許應用程式啟動,但 LLM 服務將會處於不可用狀態 (會拋出 503)
184
 
185
  @app.get("/", summary="首頁/健康檢查")
186
  async def root():
@@ -188,34 +173,7 @@ async def root():
188
  return HTMLResponse(content=f"<html><body><h1>LLM API Status: {status}</h1></body></html>", status_code=200)
189
 
190
 
191
- # --- 7. FastAPI 路由: 推論端點 v1 (複雜版,與您原有的 /infer 對應) ---
192
-
193
- @app.post("/infer", summary="執行 LLM 推論 (v1)")
194
- async def infer_endpoint(request: InferenceRequest):
195
- try:
196
- content = get_inference_response(
197
- messages=request.messages,
198
- system_message=request.system_message,
199
- max_tokens=request.max_tokens,
200
- temperature=request.temperature,
201
- top_p=request.top_p,
202
- extra_params=request.extra_params
203
- )
204
- return JSONResponse(content={
205
- "status": "success",
206
- "response": content
207
- })
208
- except HTTPException as http_ex:
209
- raise http_ex
210
- except Exception as e:
211
- print(f"[Fatal Error] During API call: {e}")
212
- raise HTTPException(
213
- status_code=500,
214
- detail="Internal Server Error."
215
- )
216
-
217
-
218
- # --- 8. FastAPI 路由: 推論端點 v4 (極簡版,與您原有的 /infer4 對應) ---
219
 
220
  @app.post("/infer4", summary="執行 LLM 推論 (v4: 極簡輸入/僅回傳 response 欄位)")
221
  async def infer4_endpoint(request: InferenceRequestMinimal):
@@ -240,14 +198,4 @@ async def infer4_endpoint(request: InferenceRequestMinimal):
240
  except Exception as e:
241
  print(f"[Fatal Error] During API call: {e}")
242
  raise HTTPException(
243
- status_code=500,
244
- detail="Internal Server Error."
245
- )
246
-
247
-
248
- # --- 9. 啟動應用程式 ---
249
-
250
- if __name__ == "__main__":
251
- print("FastAPI 服務正在啟動...")
252
- # 在 Gradio Space 中,如果沒有其他設定,這裡可能是您的應用程式入口
253
- uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
 
5
  import subprocess
6
  from typing import List, Dict, Any, Optional
7
 
8
+ # --- 0. 內嵌模組安裝 ---
9
+ # 警告: 這在許多託管環境中可能因權限不足而失敗。建議使用 requirements.txt。
10
 
11
  def install_required_modules():
12
  """使用 pip 在運行時安裝所有必要的 Python 模組。"""
 
15
  "uvicorn",
16
  "pydantic",
17
  "huggingface-hub",
18
+ "llama-cpp-python"
19
  ]
20
 
21
  print("--- 嘗試動態安裝/升級必要的 Python 模組 ---")
22
 
23
  try:
 
 
24
  subprocess.check_call([
25
  sys.executable,
26
  "-m",
27
  "pip",
28
  "install",
29
+ *required_packages,
30
  "--upgrade"
31
  ])
32
  print("所有模組安裝/更新成功。")
33
  except subprocess.CalledProcessError as e:
34
+ print(f"**致命錯誤**:模組安裝失敗。錯誤訊息: {e}")
35
  sys.exit(1)
36
  except Exception as e:
37
  print(f"**致命錯誤**:發生未知錯誤。錯誤訊息: {e}")
38
  sys.exit(1)
39
 
 
40
  install_required_modules()
41
 
42
 
 
56
  # 引入 Llama.cpp 模組
57
  from llama_cpp import Llama
58
  except ImportError as e:
59
+ print(f"**致命錯誤**:模組引入失敗。錯誤: {e}")
60
  sys.exit(1)
61
 
62
 
 
64
 
65
  MODEL_NAME = "Qwen3-0.6B-Q8_0.gguf"
66
  MODEL_REPO = "Qwen/Qwen3-0.6B-GGUF"
67
+ LLAMA_INSTANCE: Optional[Llama] = None # 全域 Llama 實例
68
 
69
  def initialize_llm():
70
  """下載模型並初始化 Llama 實例"""
 
76
  print(f"--- 1. 開始下載模型 {MODEL_NAME} ---")
77
  try:
78
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_NAME)
 
79
  except Exception as e:
 
80
  raise RuntimeError(f"無法下載模型: {e}")
81
 
82
  print("--- 2. 初始化 Llama.cpp 實例 ---")
 
86
  n_ctx=4096,
87
  n_batch=512,
88
  n_threads=os.cpu_count() // 2 or 1,
89
+ n_gpu_layers=0,
90
  verbose=False
91
  )
92
  print("Llama.cpp 模型加載成功。")
93
  except Exception as e:
 
94
  raise RuntimeError(f"Llama 實例初始化失敗: {e}")
95
 
96
 
 
110
  )
111
 
112
 
113
+ # --- 4. Pydantic 請求模型 (僅保留極簡版) ---
 
 
 
 
 
 
 
 
 
114
 
115
  class InferenceRequestMinimal(BaseModel):
116
  """極簡推論請求的資料結構,僅接收問題。"""
 
125
  max_tokens: int,
126
  temperature: float = 0.7,
127
  top_p: float = 0.95,
 
128
  ) -> str:
129
  """呼叫 Llama.cpp 實例並返回單一文字回應。"""
130
 
 
156
  )
157
 
158
 
159
+ # --- 6. FastAPI 路由: / (健康檢查/首頁) ---
160
 
161
  @app.on_event("startup")
162
  async def startup_event():
 
165
  initialize_llm()
166
  except Exception as e:
167
  print(f"應用程式啟動失敗: {e}")
168
+ # 如果初始化失敗,LLM 實例為 None,推論會拋出 503 錯誤
169
 
170
  @app.get("/", summary="首頁/健康檢查")
171
  async def root():
 
173
  return HTMLResponse(content=f"<html><body><h1>LLM API Status: {status}</h1></body></html>", status_code=200)
174
 
175
 
176
+ # --- 7. FastAPI 路由: /infer4 (極簡版) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
  @app.post("/infer4", summary="執行 LLM 推論 (v4: 極簡輸入/僅回傳 response 欄位)")
179
  async def infer4_endpoint(request: InferenceRequestMinimal):
 
198
  except Exception as e:
199
  print(f"[Fatal Error] During API call: {e}")
200
  raise HTTPException(
201
+ status_code=500,