Spaces:

Marcus719
/

ID2223_Lab2

Sleeping

App Files Files Community

Maoxt commited on Dec 3, 2025

Commit

643dcb5

verified ·

1 Parent(s): 4345ede

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -87

app.py CHANGED Viewed

@@ -4,22 +4,21 @@ import time
 from huggingface_hub import snapshot_download
 import gradio as gr
-# 尝试导入 llama_cpp，如果失败则在 UI 中提示
 try:
     from llama_cpp import Llama
 except Exception as e:
     Llama = None
     Llama_import_error = e
-# ---------- 配置区域 ----------
-# ★★★ 请在这里修改为你的模型仓库 ★★★
-MODEL_REPO = "Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF"
-# 指定只下载 q4_k_m 文件，防止下载多余文件爆盘
 GGUF_FILENAME = "unsloth.Q4_K_M.gguf"
-DEFAULT_N_CTX = 2048  # 上下文长度
-DEFAULT_MAX_TOKENS = 256 # 默认生成长度
-DEFAULT_N_THREADS = 2 # 免费 CPU 建议设为 2
 # ------------------------------
 def log(msg: str):
@@ -27,125 +26,119 @@ def log(msg: str):
 def load_model_from_hub(repo_id: str, filename: str, n_ctx=DEFAULT_N_CTX, n_threads=DEFAULT_N_THREADS):
     if Llama is None:
-        raise RuntimeError(f"llama-cpp-python 未安装或加载失败: {Llama_import_error}")
-    log(f"开始下载模型: {repo_id} / {filename} ...")
-    # 使用 snapshot_download 下载单个文件
-    # allow_patterns 确保只下载 GGUF
     local_dir = snapshot_download(
         repo_id=repo_id,
         allow_patterns=[filename],
-        local_dir_use_symlinks=False # 在 Space 中有时软链接会有问题，禁用更稳
     )
-    # 拼接完整路径
-    # snapshot_download 默认会保持目录结构，或者我们直接搜寻下载目录
     gguf_path = os.path.join(local_dir, filename)
-    # 如果直接拼接找不到，尝试搜索（容错）
     if not os.path.exists(gguf_path):
         for root, dirs, files in os.walk(local_dir):
             if filename in files:
                 gguf_path = os.path.join(root, filename)
                 break
-    if not os.path.exists(gguf_path):
-        raise FileNotFoundError(f"在 {local_dir} 中找不到 {filename}")
-    log(f"模型路径: {gguf_path}。正在加载到内存...")
-    # 初始化模型
     llm = Llama(model_path=gguf_path, n_ctx=n_ctx, n_threads=n_threads, verbose=False)
-    log("Llama 模型加载成功！")
     return llm, gguf_path
 def init_model(state):
-    """初始化按钮的回调函数"""
     try:
         if state.get("llm") is not None:
-            return "✅ 系统就绪 (模型已加载)", state
-        log("收到加载请求...")
-        # 下载并加载
         llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
-        # 更新状态
         state["llm"] = llm
         state["gguf_path"] = gguf_path
-        return "✅ 系统就绪", state
     except Exception as exc:
         tb = traceback.format_exc()
-        log(f"初始化错误: {exc}\n{tb}")
-        return f"❌ 初始化失败: {exc}", state
 def generate_response(prompt: str, max_tokens: int, state):
-    """生成按钮的回调函数"""
     try:
         if not prompt or prompt.strip() == "":
-            return "⚠️ 请输入指令。", "⚠️ 空闲", state
-        # 懒加载：如果没点初始化直接点生成，尝试自动加载
         if state.get("llm") is None:
             try:
-                log("未检测到模型，尝试自动加载...")
                 llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
                 state["llm"] = llm
                 state["gguf_path"] = gguf_path
             except Exception as e:
-                return f"❌ 模型加载失败: {e}", f"❌ 错误", state
         llm = state.get("llm")
-        log(f"正在生成 (Prompt 长度={len(prompt)})...")
-        # 构造 Llama 3 格式的 Prompt
         system_prompt = "You are a helpful AI assistant."
-        # 简单拼接：System + User
-        # 如果需要更严格的格式，可以使用 tokenizer.apply_chat_template
-        # 这里为了通用性使用简单的文本拼接，Llama 3 通常也能理解
         full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
-        # 推理
         output = llm(
             full_prompt,
             max_tokens=max_tokens,
-            stop=["<|eot_id|>"], # 停止符
             echo=False
         )
         text = output['choices'][0]['text']
-        log("生成完成。")
-        return text, "✅ 生成完毕", state
     except Exception as exc:
         tb = traceback.format_exc()
-        log(f"生成错误: {exc}\n{tb}")
-        return f"运行出错: {exc}", f"❌ 异常", state
 def soft_clear(current_state):
-    """清除按钮：只清空文本，保留模型"""
-    status = "✅ 系统就绪" if current_state.get("llm") else "⚪ 未初始化"
     return "", status, current_state
-# ---------------- Gradio UI 构建 ----------------
-# 主题设置
 theme = gr.themes.Soft(
     primary_hue="indigo",
     secondary_hue="slate",
-    neutral_hue="slate"
-)
-# 自定义 CSS
-custom_css = """
-.footer-text { font-size: 0.8em; color: gray; text-align: center; }
-"""
-with gr.Blocks(title="Llama 3.2 Lab2 Project") as demo:
-    # 标题头
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("# 🦙 Llama 3.2 (3B) Fine-Tuned Chatbot")
@@ -156,54 +149,54 @@ with gr.Blocks(title="Llama 3.2 Lab2 Project") as demo:
                 """
             )
         with gr.Column(scale=0, min_width=150):
-            status_label = gr.Label(value="⚪ 未初始化", label="系统状态", show_label=False)
-    # 主体布局
     with gr.Row():
-        # 左侧：输入与控制
         with gr.Column(scale=4):
             with gr.Group():
                 prompt_in = gr.Textbox(
                     lines=5,
-                    label="用户指令 (User Input)",
-                    placeholder="例如：请解释量子力学...",
                     elem_id="prompt-input"
                 )
-                with gr.Accordion("⚙️ 高级参数 (Advanced)", open=False):
                     max_tokens = gr.Slider(
                         minimum=16,
                         maximum=1024,
                         step=16,
                         value=DEFAULT_MAX_TOKENS,
-                        label="最大生成长度 (Max Tokens)",
-                        info="生成的越长，CPU 耗时越久。"
                     )
-            with gr.Row():
-                init_btn = gr.Button("🚀 1. 加载模型 (Load)", variant="secondary")
-                gen_btn = gr.Button("✨ 2. 生成回复 (Generate)", variant="primary")
-            clear_btn = gr.Button("🗑️ 清空历史 (Clear)", variant="stop")
-        # 右侧：输出显示
         with gr.Column(scale=6):
             output_txt = gr.Textbox(
-                label="模型回复 (Response)",
                 lines=15,
             )
-    # 底部说明
     with gr.Row():
         gr.Markdown(
-            "⚠️ *注意：推理在免费 CPU 上运行，速度可能较慢。首次运行时需要下载模型（约2GB），请耐心等待。*",
             elem_classes=["footer-text"]
         )
-    # 状态存储
     state = gr.State({"llm": None, "gguf_path": None, "status": "Not initialized"})
-    # 事件绑定
     init_btn.click(
         fn=init_model,
         inputs=state,
@@ -221,6 +214,6 @@ with gr.Blocks(title="Llama 3.2 Lab2 Project") as demo:
     clear_btn.click(fn=soft_clear, inputs=[state], outputs=[prompt_in, status_label, state])
     clear_btn.click(lambda: "", outputs=[output_txt])
-# 启动应用
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 from huggingface_hub import snapshot_download
 import gradio as gr
+# Attempt to import llama_cpp, if failed, prompt in the UI
 try:
     from llama_cpp import Llama
 except Exception as e:
     Llama = None
     Llama_import_error = e
+# ---------- Configuration Area ----------
+# ★★★ Please change this to your model repository ★★★
+MODEL_REPO = "Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF"
+# Specify to download only the q4_k_m file to prevent running out of disk space
 GGUF_FILENAME = "unsloth.Q4_K_M.gguf"
+DEFAULT_N_CTX = 2048  # Context length
+DEFAULT_MAX_TOKENS = 256 # Default generation length
+DEFAULT_N_THREADS = 2 # Recommended 2 for free CPU tier
 # ------------------------------
 def log(msg: str):
 def load_model_from_hub(repo_id: str, filename: str, n_ctx=DEFAULT_N_CTX, n_threads=DEFAULT_N_THREADS):
     if Llama is None:
+        raise RuntimeError(f"llama-cpp-python not installed or failed to load: {Llama_import_error}")
+    log(f"Starting model download: {repo_id} / {filename} ...")
+    # Use snapshot_download to download a single file
+    # allow_patterns ensures only the GGUF file is downloaded
     local_dir = snapshot_download(
         repo_id=repo_id,
         allow_patterns=[filename],
+        local_dir_use_symlinks=False # Disabling symlinks for stability in Spaces
     )
+    # Construct full path
+    # snapshot_download usually preserves directory structure, otherwise we search
     gguf_path = os.path.join(local_dir, filename)
+    # Search for the file if direct path fails (for robustness)
     if not os.path.exists(gguf_path):
         for root, dirs, files in os.walk(local_dir):
             if filename in files:
                 gguf_path = os.path.join(root, filename)
                 break
+        if not os.path.exists(gguf_path):
+            raise FileNotFoundError(f"Could not find {filename} in {local_dir}")
+    log(f"Model path: {gguf_path}. Loading into memory...")
+    # Initialize the model
     llm = Llama(model_path=gguf_path, n_ctx=n_ctx, n_threads=n_threads, verbose=False)
+    log("Llama model loaded successfully!")
     return llm, gguf_path
 def init_model(state):
+    """Callback function for the Load button"""
     try:
         if state.get("llm") is not None:
+            return "✅ System Ready (Model Loaded)", state
+        log("Received load request...")
+        # Download and load
         llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
+        # Update state
         state["llm"] = llm
         state["gguf_path"] = gguf_path
+        return "✅ System Ready", state
     except Exception as exc:
         tb = traceback.format_exc()
+        log(f"Initialization Error: {exc}\n{tb}")
+        return f"❌ Initialization Failed: {exc}", state
 def generate_response(prompt: str, max_tokens: int, state):
+    """Callback function for the Generate button"""
     try:
         if not prompt or prompt.strip() == "":
+            return "⚠️ Please enter an instruction.", "⚠️ Idle", state
+        # Lazy loading: attempt to auto-load if Generate is clicked without explicit initialization
         if state.get("llm") is None:
             try:
+                log("Model not detected, attempting auto-load...")
                 llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
                 state["llm"] = llm
                 state["gguf_path"] = gguf_path
             except Exception as e:
+                return f"❌ Model Load Failed: {e}", f"❌ Error", state
         llm = state.get("llm")
+        log(f"Generating (Prompt Length={len(prompt)})...")
+        # Construct Llama 3 format Prompt
         system_prompt = "You are a helpful AI assistant."
+        # Simple concatenation: System + User
+        # For strict formatting, use tokenizer.apply_chat_template
+        # Using simple text concatenation here for generality, Llama 3 usually understands
         full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+        # Inference
         output = llm(
             full_prompt,
             max_tokens=max_tokens,
+            stop=["<|eot_id|>"], # Stop token
             echo=False
         )
         text = output['choices'][0]['text']
+        log("Generation complete.")
+        return text, "✅ Generation Complete", state
     except Exception as exc:
         tb = traceback.format_exc()
+        log(f"Generation Error: {exc}\n{tb}")
+        return f"Runtime Error: {exc}", f"❌ Exception", state
 def soft_clear(current_state):
+    """Clear button: only clears text, keeps the model loaded"""
+    status = "✅ System Ready" if current_state.get("llm") else "⚪ Not Initialized"
     return "", status, current_state
+# ---------------- Gradio UI Construction ----------------
+# Theme settings
 theme = gr.themes.Soft(
     primary_hue="indigo",
     secondary_hue="slate",
+    neutral_hue="slate")
+# Custom CSS
+custom_css = """.footer-text { font-size: 0.8em; color: gray; text-align: center; }"""
+with gr.Blocks(title="Llama 3.2 Lab2 Project", css=custom_css, theme=theme) as demo:
+    # Header
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("# 🦙 Llama 3.2 (3B) Fine-Tuned Chatbot")
                 """
             )
         with gr.Column(scale=0, min_width=150):
+            status_label = gr.Label(value="⚪ Not Initialized", label="System Status", show_label=False)
+    # Main layout
     with gr.Row():
+        # Left: Input and Controls
         with gr.Column(scale=4):
             with gr.Group():
                 prompt_in = gr.Textbox(
                     lines=5,
+                    label="User Instruction (User Input)",
+                    placeholder="e.g., Explain Quantum Mechanics...",
                     elem_id="prompt-input"
                 )
+                with gr.Accordion("⚙️ Advanced Parameters", open=False):
                     max_tokens = gr.Slider(
                         minimum=16,
                         maximum=1024,
                         step=16,
                         value=DEFAULT_MAX_TOKENS,
+                        label="Max Generation Length (Max Tokens)",
+                        info="Longer generations will take more CPU time."
                     )
+                with gr.Row():
+                    init_btn = gr.Button("🚀 1. Load Model", variant="secondary")
+                    gen_btn = gr.Button("✨ 2. Generate Response", variant="primary")
+                clear_btn = gr.Button("🗑️ Clear Chat", variant="stop")
+        # Right: Output Display
         with gr.Column(scale=6):
             output_txt = gr.Textbox(
+                label="Model Response (Response)",
                 lines=15,
             )
+    # Footer
     with gr.Row():
         gr.Markdown(
+            "⚠️ *Note: Inference runs on a free CPU, so speed may be slow. The model (approx. 2GB) must be downloaded on first run, please be patient.*",
             elem_classes=["footer-text"]
         )
+    # State storage
     state = gr.State({"llm": None, "gguf_path": None, "status": "Not initialized"})
+    # Event binding
     init_btn.click(
         fn=init_model,
         inputs=state,
     clear_btn.click(fn=soft_clear, inputs=[state], outputs=[prompt_in, status_label, state])
     clear_btn.click(lambda: "", outputs=[output_txt])
+# Launch the application
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)