Spaces:

Marcus719
/

ID2223_Lab2

Sleeping

App Files Files Community

Marcus719 commited on Nov 30, 2025

Commit

8665c7a

verified ·

1 Parent(s): 49dc795

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -153

app.py CHANGED Viewed

@@ -1,230 +1,211 @@
-# app.py
 import os
 import traceback
 import time
 from huggingface_hub import snapshot_download
 import gradio as gr
-# Try to import llama-cpp-python; Space should install it from requirements.
-# If the import fails, the app will still start and show the error in status.
 try:
     from llama_cpp import Llama
-except Exception as e:  # pragma: no cover
     Llama = None
     Llama_import_error = e
-# ---------- Configuration ----------
-MODEL_REPO = "Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF"
-GGUF_ALLOW_PATTERNS = ["*.gguf"]
-DEFAULT_N_CTX = 4096
-DEFAULT_MAX_TOKENS = 256
-DEFAULT_N_THREADS = 4  # adjust based on Space CPU / available threads
-# -----------------------------------
 def log(msg: str):
     print(f"[app] {time.strftime('%Y-%m-%d %H:%M:%S')} - {msg}", flush=True)
-def find_gguf_in_dir(local_dir: str):
-    for f in os.listdir(local_dir):
-        if f.endswith(".gguf"):
-            return os.path.join(local_dir, f)
-    return None
-def load_model_from_hub(repo_id: str, n_ctx=DEFAULT_N_CTX, n_threads=DEFAULT_N_THREADS):
-    """
-    Downloads the model files using huggingface_hub.snapshot_download and returns
-    an initialized Llama instance (from llama_cpp).
-    """
     if Llama is None:
-        raise RuntimeError(f"llama-cpp-python is not available: {Llama_import_error}")
-    log(f"Starting snapshot_download for repo: {repo_id} (this may take a while on first run)")
-    local_dir = snapshot_download(repo_id=repo_id, allow_patterns=GGUF_ALLOW_PATTERNS)
-    log(f"Downloaded/located files at: {local_dir}")
-    gguf_path = find_gguf_in_dir(local_dir)
-    if gguf_path is None:
-        raise FileNotFoundError(f"No .gguf file found in {local_dir}")
-    log(f"Found GGUF file: {gguf_path}. Initializing Llama loader...")
-    # init the model
-    llm = Llama(model_path=gguf_path, n_ctx=n_ctx, n_threads=n_threads)
-    log("Llama model initialized successfully.")
     return llm, gguf_path
-# The Gradio app uses a simple state pattern: we store the Llama instance and gguf path in a state dict.
 def init_model(state):
-    """
-    Called by the Init button. Downloads and initializes the model if not already loaded.
-    Returns a status message for the status Label and the state object for persistence.
-    """
     try:
         if state.get("llm") is not None:
-            return "✅ Ready (model already loaded)", state
-        # show immediate feedback to user via return
-        log("Init requested - loading model now.")
-        state["status"] = "Downloading model from Hub..."
-        # download and load
-        llm, gguf_path = load_model_from_hub(MODEL_REPO)
-        # save into state
         state["llm"] = llm
         state["gguf_path"] = gguf_path
-        state["status"] = "✅ Ready"
-        return "✅ Ready", state
     except Exception as exc:
         tb = traceback.format_exc()
-        log(f"Error during init: {exc}\n{tb}")
-        state["status"] = f"❌ Init failed: {exc}"
-        return f"❌ Init failed: {exc}", state
 def generate_response(prompt: str, max_tokens: int, state):
-    """
-    Main generate function wired to the Generate button.
-    Returns (output_text, status_text, state)
-    """
     try:
         if not prompt or prompt.strip() == "":
-            return "Please provide a prompt.", "⚠️ Idle", state
-        # Lazy load if model not initialized
         if state.get("llm") is None:
-            # try to load on-the-fly
-            log("Model not loaded, attempting lazy-load...")
-            # provide immediate user-visible status by returning early while we load,
-            # but Gradio can't stream two-stage responses easily, so we'll block here and update status after.
             try:
-                llm, gguf_path = load_model_from_hub(MODEL_REPO)
                 state["llm"] = llm
                 state["gguf_path"] = gguf_path
-                log("Lazy-load successful.")
             except Exception as e:
-                tb = traceback.format_exc()
-                log(f"Lazy-load failed: {e}\n{tb}")
-                return f"Error loading model: {e}", f"❌ Error: {e}", state
         llm = state.get("llm")
-        if llm is None:
-            return "Model not initialized.", "❌ Not initialized", state
-        log(f"Generating for prompt (len={len(prompt)}), max_tokens={max_tokens}")
-        status_msg = "Processing..."
-        # Call the model synchronously; this will block until generation is done
-        out = llm(prompt, max_tokens=max_tokens)
-        # llama_cpp returns different shapes depending on version; handle safely
-        text = ""
-        if isinstance(out, dict):
-            # common shape: {"choices":[{"text": "..."}], ...}
-            try:
-                choices = out.get("choices")
-                if choices and isinstance(choices, list) and len(choices) > 0:
-                    text = choices[0].get("text", "")
-                else:
-                    text = str(out)
-            except Exception:
-                text = str(out)
-        else:
-            # fallback
-            text = str(out)
-        log("Generation completed.")
-        return text, "✅ Done", state
     except Exception as exc:
         tb = traceback.format_exc()
-        log(f"Error during generation: {exc}\n{tb}")
-        return f"Error: {exc}\n\n{tb}", f"❌ Error: {exc}", state
-# ---------------- Gradio UI ----------------
-# 使用 Soft 主题，配色更具现代感
 theme = gr.themes.Soft(
     primary_hue="indigo",
     secondary_hue="slate",
-    neutral_hue="slate",
-    font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"]
 )
-# 自定义 CSS 稍微调整一下边距和圆角
 custom_css = """
-#response-box {
-    font-family: 'Inter', sans-serif;
-    background-color: #f9fafb;
-    border-radius: 8px;
-    padding: 10px;
-}
 """
 with gr.Blocks(title="Llama 3.2 Lab2 Project", theme=theme, css=custom_css) as demo:
-    # --- 标题和介绍区域 ---
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("# 🦙 Llama 3.2 (3B) Fine-Tuned Chatbot")
             gr.Markdown(
-                """
-                **ID2223 Lab 2 Project** | Fine-tuned on the **FineTome-100k** dataset.
-                Running locally on CPU via **GGUF** quantization (4-bit).
                 """
             )
         with gr.Column(scale=0, min_width=150):
-            # 状态显示放在右上角，醒目
-            status_label = gr.Label(value="⚪ Not initialized", label="System Status", show_label=False)
-    # --- 主体布局：左侧控制，右侧输出 ---
     with gr.Row():
-        # 左侧：控制面板
         with gr.Column(scale=4):
             with gr.Group():
                 prompt_in = gr.Textbox(
                     lines=5,
-                    label="User Instruction",
-                    placeholder="E.g., Explain quantum entanglement to a 5-year-old...",
                     elem_id="prompt-input"
                 )
-                # 将参数折叠起来，保持界面清爽
-                with gr.Accordion("⚙️ Advanced Parameters", open=False):
                     max_tokens = gr.Slider(
                         minimum=16,
                         maximum=1024,
                         step=16,
                         value=DEFAULT_MAX_TOKENS,
-                        label="Max Generation Tokens",
-                        info="Longer generations require more CPU time."
                     )
-            # 按钮区域
             with gr.Row():
-                init_btn = gr.Button("🚀 1. Load Model", variant="secondary", scale=1)
-                gen_btn = gr.Button("✨ 2. Generate", variant="primary", scale=2)
-            with gr.Row():
-                clear_btn = gr.Button("🗑️ Clear History", variant="stop")
-        # 右侧：回复展示
         with gr.Column(scale=6):
             output_txt = gr.Textbox(
-                label="Llama Response",
                 lines=15,
-                placeholder="The model response will appear here...",
-                show_copy_button=True, # 允许复制内容
-                elem_id="response-box"
             )
-    # --- 底部版权/说明 ---
     with gr.Row():
         gr.Markdown(
-            "⚠️ *Note: Inference is running on CPU. Generation speed depends on the Space hardware.*",
             elem_classes=["footer-text"]
         )
-    # --- 状态管理 (Hidden) ---
     state = gr.State({"llm": None, "gguf_path": None, "status": "Not initialized"})
-    # --- 事件绑定 ---
-    # 点击 Load Model
     init_btn.click(
         fn=init_model,
         inputs=state,
@@ -232,7 +213,6 @@ with gr.Blocks(title="Llama 3.2 Lab2 Project", theme=theme, css=custom_css) as d
         show_progress=True
     )
-    # 点击 Generate
     gen_btn.click(
         fn=generate_response,
         inputs=[prompt_in, max_tokens, state],
@@ -240,22 +220,9 @@ with gr.Blocks(title="Llama 3.2 Lab2 Project", theme=theme, css=custom_css) as d
         show_progress=True
     )
-    # 点击 Clear
-    def clear_all():
-        return "", "⚪ Ready", {"llm": None, "gguf_path": None, "status": "Not initialized"}
-    # 注意：Clear 按钮逻辑稍微修改，避免清空掉已加载的模型对象
-    # 这里的 clear_all 只是重置了 UI，实际你可以保留 state 中的 llm 以免重复加载
-    # 改进版 Clear 逻辑：
-    def soft_clear(current_state):
-        # 保持模型加载状态，只清空文本
-        status = "✅ Ready" if current_state.get("llm") else "⚪ Not initialized"
-        return "", status, current_state
     clear_btn.click(fn=soft_clear, inputs=[state], outputs=[prompt_in, status_label, state])
-    # 同时也清空输出框
     clear_btn.click(lambda: "", outputs=[output_txt])
-# Launch configuration
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

 import os
 import traceback
 import time
 from huggingface_hub import snapshot_download
 import gradio as gr
+# 尝试导入 llama_cpp，如果失败则在 UI 中提示
 try:
     from llama_cpp import Llama
+except Exception as e:
     Llama = None
     Llama_import_error = e
+# ---------- 配置区域 ----------
+# ★★★ 请在这里修改为你的模型仓库 ★★★
+MODEL_REPO = "Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF"
+# 指定只下载 q4_k_m 文件，防止下载多余文件爆盘
+GGUF_FILENAME = "unsloth.Q4_K_M.gguf"
+DEFAULT_N_CTX = 2048  # 上下文长度
+DEFAULT_MAX_TOKENS = 256 # 默认生成长度
+DEFAULT_N_THREADS = 2 # 免费 CPU 建议设为 2
+# ------------------------------
 def log(msg: str):
     print(f"[app] {time.strftime('%Y-%m-%d %H:%M:%S')} - {msg}", flush=True)
+def load_model_from_hub(repo_id: str, filename: str, n_ctx=DEFAULT_N_CTX, n_threads=DEFAULT_N_THREADS):
     if Llama is None:
+        raise RuntimeError(f"llama-cpp-python 未安装或加载失败: {Llama_import_error}")
+    log(f"开始下载模型: {repo_id} / {filename} ...")
+    # 使用 snapshot_download 下载单个文件
+    # allow_patterns 确保只下载 GGUF
+    local_dir = snapshot_download(
+        repo_id=repo_id,
+        allow_patterns=[filename],
+        local_dir_use_symlinks=False # 在 Space 中有时软链接会有问题，禁用更稳
+    )
+    # 拼接完整路径
+    # snapshot_download 默认会保持目录结构，或者我们直接搜寻下载目录
+    gguf_path = os.path.join(local_dir, filename)
+    # 如果直接拼接找不到，尝试搜索（容错）
+    if not os.path.exists(gguf_path):
+        for root, dirs, files in os.walk(local_dir):
+            if filename in files:
+                gguf_path = os.path.join(root, filename)
+                break
+    if not os.path.exists(gguf_path):
+        raise FileNotFoundError(f"在 {local_dir} 中找不到 {filename}")
+    log(f"模型路径: {gguf_path}。正在加载到内存...")
+    # 初始化模型
+    llm = Llama(model_path=gguf_path, n_ctx=n_ctx, n_threads=n_threads, verbose=False)
+    log("Llama 模型加载成功！")
     return llm, gguf_path
 def init_model(state):
+    """初始化按钮的回调函数"""
     try:
         if state.get("llm") is not None:
+            return "✅ 系统就绪 (模型已加载)", state
+        log("收到加载请求...")
+        # 下载并加载
+        llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
+        # 更新状态
         state["llm"] = llm
         state["gguf_path"] = gguf_path
+        return "✅ 系统就绪", state
     except Exception as exc:
         tb = traceback.format_exc()
+        log(f"初始化错误: {exc}\n{tb}")
+        return f"❌ 初始化失败: {exc}", state
 def generate_response(prompt: str, max_tokens: int, state):
+    """生成按钮的回调函数"""
     try:
         if not prompt or prompt.strip() == "":
+            return "⚠️ 请输入指令。", "⚠️ 空闲", state
+        # 懒加载：如果没点初始化直接点生成，尝试自动加载
         if state.get("llm") is None:
             try:
+                log("未检测到模型，尝试自动加载...")
+                llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
                 state["llm"] = llm
                 state["gguf_path"] = gguf_path
             except Exception as e:
+                return f"❌ 模型加载失败: {e}", f"❌ 错误", state
         llm = state.get("llm")
+        log(f"正在生成 (Prompt 长度={len(prompt)})...")
+        # 构造 Llama 3 格式的 Prompt
+        system_prompt = "You are a helpful AI assistant."
+        # 简单拼接：System + User
+        # 如果需要更严格的格式，可以使用 tokenizer.apply_chat_template
+        # 这里为了通用性使用简单的文本拼接，Llama 3 通常也能理解
+        full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+        # 推理
+        output = llm(
+            full_prompt,
+            max_tokens=max_tokens,
+            stop=["<|eot_id|>"], # 停止符
+            echo=False
+        )
+        text = output['choices'][0]['text']
+        log("生成完成。")
+        return text, "✅ 生成完毕", state
     except Exception as exc:
         tb = traceback.format_exc()
+        log(f"生成错误: {exc}\n{tb}")
+        return f"运行出错: {exc}", f"❌ 异常", state
+def soft_clear(current_state):
+    """清除按钮：只清空文本，保留模型"""
+    status = "✅ 系统就绪" if current_state.get("llm") else "⚪ 未初始化"
+    return "", status, current_state
+# ---------------- Gradio UI 构建 ----------------
+# 主题设置
 theme = gr.themes.Soft(
     primary_hue="indigo",
     secondary_hue="slate",
+    neutral_hue="slate"
 )
+# 自定义 CSS
 custom_css = """
+.footer-text { font-size: 0.8em; color: gray; text-align: center; }
 """
 with gr.Blocks(title="Llama 3.2 Lab2 Project", theme=theme, css=custom_css) as demo:
+    # 标题头
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("# 🦙 Llama 3.2 (3B) Fine-Tuned Chatbot")
             gr.Markdown(
+                f"""
+                **ID2223 Lab 2 Project** | Fine-tuned on **FineTome-100k**.
+                Running on CPU (GGUF 4-bit) | Model: `{MODEL_REPO}`
                 """
             )
         with gr.Column(scale=0, min_width=150):
+            status_label = gr.Label(value="⚪ 未初始化", label="系统状态", show_label=False)
+    # 主体布局
     with gr.Row():
+        # 左侧：输入与控制
         with gr.Column(scale=4):
             with gr.Group():
                 prompt_in = gr.Textbox(
                     lines=5,
+                    label="用户指令 (User Input)",
+                    placeholder="例如：请解释量子力学...",
                     elem_id="prompt-input"
                 )
+                with gr.Accordion("⚙️ 高级参数 (Advanced)", open=False):
                     max_tokens = gr.Slider(
                         minimum=16,
                         maximum=1024,
                         step=16,
                         value=DEFAULT_MAX_TOKENS,
+                        label="最大生成长度 (Max Tokens)",
+                        info="生成的越长，CPU 耗时越久。"
                     )
             with gr.Row():
+                init_btn = gr.Button("🚀 1. 加载模型 (Load)", variant="secondary")
+                gen_btn = gr.Button("✨ 2. 生成回复 (Generate)", variant="primary")
+            clear_btn = gr.Button("🗑️ 清空历史 (Clear)", variant="stop")
+        # 右侧：输出显示
         with gr.Column(scale=6):
             output_txt = gr.Textbox(
+                label="模型回复 (Response)",
                 lines=15,
+                show_copy_button=True,
+                interactive=False
             )
+    # 底部说明
     with gr.Row():
         gr.Markdown(
+            "⚠️ *注意：推理在免费 CPU 上运行，速度可能较慢。首次运行时需要下载模型（约2GB），请耐心等待。*",
             elem_classes=["footer-text"]
         )
+    # 状态存储
     state = gr.State({"llm": None, "gguf_path": None, "status": "Not initialized"})
+    # 事件绑定
     init_btn.click(
         fn=init_model,
         inputs=state,
         show_progress=True
     )
     gen_btn.click(
         fn=generate_response,
         inputs=[prompt_in, max_tokens, state],
         show_progress=True
     )
     clear_btn.click(fn=soft_clear, inputs=[state], outputs=[prompt_in, status_label, state])
     clear_btn.click(lambda: "", outputs=[output_txt])
+# 启动应用
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)