Spaces:

hsuwill000
/

ESP01LLMSample

Sleeping

App Files Files Community

hsuwill000 commited on Dec 1, 2025

Commit

df53ff4

verified ·

1 Parent(s): defa84e

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -39

app.py CHANGED Viewed

@@ -1,12 +1,47 @@
 # app.py
 import os
 import gradio as gr
 from typing import List, Dict
 from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
-# --- 1. 模型設定與下載 ---
 # 您指定的模型資訊
 MODEL_NAME = "Qwen3-0.6B-Q8_0.gguf"
@@ -16,39 +51,36 @@ MODEL_REPO = "Qwen/Qwen3-0.6B-GGUF"
 DEFAULT_SYSTEM_MESSAGE = "You are a friendly and helpful assistant. Please answer the user's questions concisely and accurately."
 # 步驟 1: 下載 GGUF 模型
-# 模型會被下載到 ~/.cache/huggingface/hub/ 或指定的快取目錄
 try:
     print(f"嘗試從 {MODEL_REPO} 下載 {MODEL_NAME}...")
     model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_NAME)
     print(f"模型下載完成，路徑: {model_path}")
 except Exception as e:
-    print(f"**錯誤**：無法下載模型。請檢查網路連線或模型名稱/權限。錯誤訊息: {e}")
-    # 在 Gradio Space 中，如果模型無法下載，應用程式會無法啟動。
-    # 這裡可以選擇性地退出或使用本地路徑作為備用（如果存在）。
-    exit(1)
-# --- 2. Llama.cpp 初始化 ---
 # 步驟 2: 初始化 Llama.cpp 實例
-# n_gpu_layers=0 表示不使用 GPU (CPU 推論)，如果環境支援 CUDA/cuBLAS，可以設定為 >0
 try:
     print("正在初始化 Llama.cpp 實例...")
     llm = Llama(
         model_path=model_path,
         n_ctx=4096,  # 上下文長度
         n_batch=512, # 批次大小
-        n_threads=os.cpu_count() // 2 or 1, # 使用一半的 CPU 核心
         n_gpu_layers=0, # CPU 推論
         verbose=False # 關閉內部日誌輸出
     )
     print("Llama.cpp 模型加載成功。")
 except Exception as e:
     print(f"**錯誤**：Llama.cpp 實例初始化失敗。錯誤訊息: {e}")
-    exit(1)
-# --- 3. 推論核心函式 ---
 def llama_inference(
     message: str,
@@ -60,32 +92,25 @@ def llama_inference(
 ) -> str:
     """
     使用 Llama.cpp 實例執行推論並返回回應。
-    :param message: 當前的使用者輸入。
-    :param chat_history: Gradio 傳遞的聊天歷史記錄 (list of [user, bot] pairs)。
-    :return: LLM 的回應文字。
     """
-    # 將 Gradio 的聊天歷史轉換為 Llama.cpp/OpenAI 格式的 messages 列表
     messages = [{"role": "system", "content": system_message}]
     for human, assistant in chat_history:
-        # 歷史對話
         messages.append({"role": "user", "content": human})
         messages.append({"role": "assistant", "content": assistant})
-    # 當前訊息
     messages.append({"role": "user", "content": message})
     try:
-        # 呼叫 Llama.cpp 的 create_chat_completion 介面 (與 OpenAI 格式相容)
         response = llm.create_chat_completion(
             messages=messages,
             max_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
-            # stream=False 是預設值
         )
         # 解析回應
@@ -100,23 +125,16 @@ def llama_inference(
         return f"❌ 伺服器錯誤 (Llama.cpp 推論失敗): {e}"
-# --- 4. Gradio 介面設定 ---
-# 定義 Gradio 聊天函式 (用於更新介面)
 def chat_interface(message: str, history: List[List[str]]):
     """Gradio 介面調用函式。"""
-    # 這裡可以固定或從另一個��入元件獲取參數，為了簡化，使用硬編碼值
     response = llama_inference(
         message=message,
         chat_history=history,
-        system_message=DEFAULT_SYSTEM_MESSAGE,
-        max_tokens=4096,
-        temperature=0.7,
-        top_p=0.95
     )
-    # Gradio 聊天介面要求回傳回應文字
     return response
@@ -129,30 +147,22 @@ with gr.Blocks(title="Qwen3-0.6B-GGUF 聊天機器人") as demo:
         """
     )
-    # 聊天元件
     chatbot = gr.Chatbot(
         label="聊天記錄",
         height=500
     )
-    # 聊天輸入元件
     chat_input = gr.Textbox(
         show_label=False,
         placeholder="請輸入你的問題...",
         container=False
     )
-    # 綁定聊天邏輯
-    # submit 觸發事件：
-    # - fn: 要執行的 Python 函式 (chat_interface)
-    # - inputs: 函式接收的輸入 ([Textbox 的內容, Chatbot 的歷史])
-    # - outputs: 函式輸出的結果 (Chatbot 的新歷史)
     chat_input.submit(
         fn=chat_interface,
         inputs=[chat_input, chatbot],
         outputs=chatbot
     ).then(
-        # 清空輸入框
         fn=lambda: "",
         inputs=None,
         outputs=chat_input,
@@ -161,6 +171,4 @@ with gr.Blocks(title="Qwen3-0.6B-GGUF 聊天機器人") as demo:
 # 啟動應用程式
 if __name__ == "__main__":
-    # 在 Gradio Space 中，會使用 gunicorn 或類似服務來運行，但如果要在本地測試，可以使用以下命令：
-    # python app.py
     demo.launch(server_name="0.0.0.0", server_port=7860)

 # app.py
 import os
+import sys
+import subprocess
 import gradio as gr
 from typing import List, Dict
 from huggingface_hub import hf_hub_download
+# --- 0. 內嵌安裝 llama-cpp-python ---
+# 警告：這是一個非標準且可能失敗的解決方案。
+# 建議在 Gradio Space 中使用 requirements.txt 來安裝依賴。
+try:
+    print("--- 嘗試動態安裝 llama-cpp-python ---")
+    # 執行 pip install 命令
+    # 使用 sys.executable 確保使用當前的 Python 解譯器
+    subprocess.check_call([
+        sys.executable,
+        "-m",
+        "pip",
+        "install",
+        "llama-cpp-python",
+        "--upgrade" # 確保是最新版本
+    ])
+    print("llama-cpp-python 安裝/更新成功。")
+except subprocess.CalledProcessError as e:
+    print(f"**致命錯誤**：llama-cpp-python 安裝失敗。請檢查環境權限或系統依賴。錯誤訊息: {e}")
+    # 由於安裝失敗，我們不能繼續執行
+    sys.exit(1)
+except Exception as e:
+    print(f"**致命錯誤**：發生未知錯誤。錯誤訊息: {e}")
+    sys.exit(1)
+# --- 1. 引入 llama_cpp ---
+# 必須在嘗試安裝之後才能引入
+try:
+    from llama_cpp import Llama
+except ImportError:
+    print("**致命錯誤**：即使嘗試安裝，仍然無法引入 llama_cpp。請檢查 pip 安裝日誌。")
+    sys.exit(1)
+# --- 2. 模型設定與下載 ---
 # 您指定的模型資訊
 MODEL_NAME = "Qwen3-0.6B-Q8_0.gguf"
 DEFAULT_SYSTEM_MESSAGE = "You are a friendly and helpful assistant. Please answer the user's questions concisely and accurately."
 # 步驟 1: 下載 GGUF 模型
 try:
     print(f"嘗試從 {MODEL_REPO} 下載 {MODEL_NAME}...")
     model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_NAME)
     print(f"模型下載完成，路徑: {model_path}")
 except Exception as e:
+    print(f"**錯誤**：無法下載模型。錯誤訊息: {e}")
+    sys.exit(1) # 無法下載模型則退出
+# --- 3. Llama.cpp 初始化 ---
 # 步驟 2: 初始化 Llama.cpp 實例
 try:
     print("正在初始化 Llama.cpp 實例...")
     llm = Llama(
         model_path=model_path,
         n_ctx=4096,  # 上下文長度
         n_batch=512, # 批次大小
+        # 為了 Gradio Space 穩定性，使用少量 CPU 核心
+        n_threads=os.cpu_count() // 2 or 1,
         n_gpu_layers=0, # CPU 推論
         verbose=False # 關閉內部日誌輸出
     )
     print("Llama.cpp 模型加載成功。")
 except Exception as e:
     print(f"**錯誤**：Llama.cpp 實例初始化失敗。錯誤訊息: {e}")
+    sys.exit(1)
+# --- 4. 推論核心函式 ---
 def llama_inference(
     message: str,
 ) -> str:
     """
     使用 Llama.cpp 實例執行推論並返回回應。
     """
+    # 格式化訊息列表，包含系統提示和聊天歷史
     messages = [{"role": "system", "content": system_message}]
     for human, assistant in chat_history:
         messages.append({"role": "user", "content": human})
         messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": message})
     try:
+        # 呼叫 Llama.cpp 的 create_chat_completion 介面
         response = llm.create_chat_completion(
             messages=messages,
             max_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
         )
         # 解析回應
         return f"❌ 伺服器錯誤 (Llama.cpp 推論失敗): {e}"
+# --- 5. Gradio 介面設定 ---
 def chat_interface(message: str, history: List[List[str]]):
     """Gradio 介面調用函式。"""
     response = llama_inference(
         message=message,
         chat_history=history,
     )
     return response
         """
     )
     chatbot = gr.Chatbot(
         label="聊天記錄",
         height=500
     )
     chat_input = gr.Textbox(
         show_label=False,
         placeholder="請輸入你的問題...",
         container=False
     )
     chat_input.submit(
         fn=chat_interface,
         inputs=[chat_input, chatbot],
         outputs=chatbot
     ).then(
         fn=lambda: "",
         inputs=None,
         outputs=chat_input,
 # 啟動應用程式
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)