Spaces:

hsuwill000
/

ESP01LLMSample

Sleeping

App Files Files Community

ESP01LLMSample / app.py

hsuwill000

Update app.py

df53ff4 verified 5 months ago

raw

history blame

5.1 kB

	# app.py

	import os
	import sys
	import subprocess
	import gradio as gr
	from typing import List, Dict
	from huggingface_hub import hf_hub_download

	# --- 0. 內嵌安裝 llama-cpp-python ---
	# 警告：這是一個非標準且可能失敗的解決方案。
	# 建議在 Gradio Space 中使用 requirements.txt 來安裝依賴。
	try:
	print("--- 嘗試動態安裝 llama-cpp-python ---")
	# 執行 pip install 命令
	# 使用 sys.executable 確保使用當前的 Python 解譯器
	subprocess.check_call([
	sys.executable,
	"-m",
	"pip",
	"install",
	"llama-cpp-python",
	"--upgrade" # 確保是最新版本
	])
	print("llama-cpp-python 安裝/更新成功。")
	except subprocess.CalledProcessError as e:
	print(f"致命錯誤：llama-cpp-python 安裝失敗。請檢查環境權限或系統依賴。錯誤訊息: {e}")
	# 由於安裝失敗，我們不能繼續執行
	sys.exit(1)
	except Exception as e:
	print(f"致命錯誤：發生未知錯誤。錯誤訊息: {e}")
	sys.exit(1)


	# --- 1. 引入 llama_cpp ---
	# 必須在嘗試安裝之後才能引入
	try:
	from llama_cpp import Llama
	except ImportError:
	print("致命錯誤：即使嘗試安裝，仍然無法引入 llama_cpp。請檢查 pip 安裝日誌。")
	sys.exit(1)


	# --- 2. 模型設定與下載 ---

	# 您指定的模型資訊
	MODEL_NAME = "Qwen3-0.6B-Q8_0.gguf"
	MODEL_REPO = "Qwen/Qwen3-0.6B-GGUF"

	# 固定的系統提示
	DEFAULT_SYSTEM_MESSAGE = "You are a friendly and helpful assistant. Please answer the user's questions concisely and accurately."

	# 步驟 1: 下載 GGUF 模型
	try:
	print(f"嘗試從 {MODEL_REPO} 下載 {MODEL_NAME}...")
	model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_NAME)
	print(f"模型下載完成，路徑: {model_path}")
	except Exception as e:
	print(f"錯誤：無法下載模型。錯誤訊息: {e}")
	sys.exit(1) # 無法下載模型則退出


	# --- 3. Llama.cpp 初始化 ---

	# 步驟 2: 初始化 Llama.cpp 實例
	try:
	print("正在初始化 Llama.cpp 實例...")
	llm = Llama(
	model_path=model_path,
	n_ctx=4096, # 上下文長度
	n_batch=512, # 批次大小
	# 為了 Gradio Space 穩定性，使用少量 CPU 核心
	n_threads=os.cpu_count() // 2 or 1,
	n_gpu_layers=0, # CPU 推論
	verbose=False # 關閉內部日誌輸出
	)
	print("Llama.cpp 模型加載成功。")
	except Exception as e:
	print(f"錯誤：Llama.cpp 實例初始化失敗。錯誤訊息: {e}")
	sys.exit(1)


	# --- 4. 推論核心函式 ---

	def llama_inference(
	message: str,
	chat_history: List[List[str]],
	system_message: str = DEFAULT_SYSTEM_MESSAGE,
	max_tokens: int = 4096,
	temperature: float = 0.7,
	top_p: float = 0.95
	) -> str:
	"""
	使用 Llama.cpp 實例執行推論並返回回應。
	"""

	# 格式化訊息列表，包含系統提示和聊天歷史
	messages = [{"role": "system", "content": system_message}]

	for human, assistant in chat_history:
	messages.append({"role": "user", "content": human})
	messages.append({"role": "assistant", "content": assistant})

	messages.append({"role": "user", "content": message})


	try:
	# 呼叫 Llama.cpp 的 create_chat_completion 介面
	response = llm.create_chat_completion(
	messages=messages,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	)

	# 解析回應
	if response.get('choices') and response['choices'][0].get('message'):
	content = response['choices'][0]['message'].get('content', "⚠️ LLM 服務回傳空內容。")
	return content

	return "⚠️ LLM 服務回傳空內容。"

	except Exception as e:
	print(f"[Error] Llama Inference failed: {e}")
	return f"❌ 伺服器錯誤 (Llama.cpp 推論失敗): {e}"


	# --- 5. Gradio 介面設定 ---

	def chat_interface(message: str, history: List[List[str]]):
	"""Gradio 介面調用函式。"""

	response = llama_inference(
	message=message,
	chat_history=history,
	)

	return response


	# 建立 Gradio 介面
	with gr.Blocks(title="Qwen3-0.6B-GGUF 聊天機器人") as demo:
	gr.Markdown(
	f"""
	# Qwen3-0.6B-GGUF 聊天機器人
	使用 llama-cpp-python 模組運行 {MODEL_NAME} 模型。
	"""
	)

	chatbot = gr.Chatbot(
	label="聊天記錄",
	height=500
	)

	chat_input = gr.Textbox(
	show_label=False,
	placeholder="請輸入你的問題...",
	container=False
	)

	chat_input.submit(
	fn=chat_interface,
	inputs=[chat_input, chatbot],
	outputs=chatbot
	).then(
	fn=lambda: "",
	inputs=None,
	outputs=chat_input,
	queue=False
	)

	# 啟動應用程式
	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)