Spaces:

Marcus719
/

ID2223_Lab2

Sleeping

App Files Files Community

Marcus719 commited on Dec 6, 2025

Commit

9ebaef7

verified ·

1 Parent(s): 078bd3c

Update app.py

Browse files

Files changed (1) hide show

app.py +203 -143

app.py CHANGED Viewed

@@ -1,166 +1,226 @@
-import gradio as gr
-import time
 import os
-# from llama_cpp import Llama # Uncomment if running locally with the library installed
-import numpy as np
-# --- CONFIGURATION ---
-GGUF_MODEL_PATH_1B = "./llama-3.2-1b-summary-q4_k_m.gguf"
-GGUF_MODEL_PATH_3B = "./llama-3.2-3b-summary-q4_k_m.gguf"
-SYSTEM_PROMPT = (
-    "You are an expert summarization bot. Your task is to provide a comprehensive "
-    "and concise summary of the user's document based on the requested length."
-)
-# ----------------------------------------------------
-# 1. MODEL LOADING FUNCTION
-# ----------------------------------------------------
-# Note: For demonstration purposes, I am keeping your logic structure.
-# Ensure llama-cpp-python is installed to run this part.
-def load_llm(model_path):
-    print(f"Attempting to load GGUF model: {model_path}...")
-    try:
-        from llama_cpp import Llama
-        llm = Llama(
-            model_path=model_path,
-            n_gpu_layers=0,
-            n_ctx=2048,
-            verbose=True
-        )
-        print(f"Successfully loaded model: {model_path}")
-        return llm
-    except Exception as e:
-        print(f"Error loading model {model_path}: {e}")
-        # Placeholder for when models are missing (prevents crash during UI testing)
-        return None
-# Load models globally
-llm_1b = load_llm(GGUF_MODEL_PATH_1B)
-llm_3b = load_llm(GGUF_MODEL_PATH_3B)
-# ----------------------------------------------------
-# 2. CORE PROCESSING FUNCTION
-# ----------------------------------------------------
-def generate_summary_and_compare(long_document, selected_model, summary_length):
-    # 1. Select Model
-    if "1B" in selected_model:
-        selected_llm = llm_1b
-        model_name_display = "Llama-3.2-1B"
-    elif "3B" in selected_model:
-        selected_llm = llm_3b
-        model_name_display = "Llama-3.2-3B"
-    else:
-        return "Error: Invalid model selection.", ""
-    # Check if model loaded successfully
-    if selected_llm is None:
-        return "Error: Model file not found or failed to load.", "Latency: N/A"
-    # 2. Build Prompt
-    instruction = f"Please summarize the following document and keep the summary {summary_length}. Document: \n\n{long_document}"
-    full_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
-    # 3. Inference
-    start_time = time.time()
-    max_tokens = 250 if "Detailed" in summary_length else 100
     try:
-        output = selected_llm(
-            full_prompt,
-            max_tokens=max_tokens,
-            stop=["<|eot_id|>"],
-            temperature=0.7,
-            echo=False,
         )
-        end_time = time.time()
-        total_latency = end_time - start_time
-        summary_output = output["choices"][0]["text"].strip()
-    except Exception as e:
-        total_latency = time.time() - start_time
-        summary_output = f"Inference Error on {model_name_display}. Error: {e}"
-    # 4. Report
-    speed_report = f"Model: {model_name_display}\nTotal Latency: {total_latency:.2f} seconds"
-    return summary_output, speed_report
-# ----------------------------------------------------
-# 3. GRADIO INTERFACE (UI IMPROVED)
-# ----------------------------------------------------
-# 使用 Soft 主题，色调简洁
 theme = gr.themes.Soft(
-    primary_hue="blue",
-    neutral_hue="slate",
-).set(
-    button_primary_background_fill="*primary_500",
-    button_primary_background_fill_hover="*primary_600",
 )
-with gr.Blocks(title="KTH ID2223 Lab 2", theme=theme) as demo:
-    # Header Section
     with gr.Row():
-        gr.Markdown(
-            """
-            # LLM Document Summarizer
-            Select a model and input your text below to generate a summary.
-            """
-        )
-    with gr.Row(equal_height=False):
-        # --- Left Column: Input & Controls ---
-        with gr.Column(scale=4, variant="panel"):
-            gr.Markdown("### Input Configuration")
-            input_document = gr.Textbox(
-                lines=12,
-                label="Document Content",
-                placeholder="Paste the text you need summarized here...",
-                show_copy_button=True
             )
-            # Grouping settings for a cleaner look
             with gr.Group():
-                with gr.Row():
-                    model_selector = gr.Radio(
-                        ["Llama-3.2-1B (Faster)", "Llama-3.2-3B (Quality)"],
-                        label="Model Selection",
-                        value="Llama-3.2-1B (Faster)"
                     )
-                    summary_control = gr.Radio(
-                        ["Concise (<50 words)", "Detailed (<200 words)"],
-                        label="Summary Length",
-                        value="Concise (<50 words)"
-                    )
-            process_button = gr.Button("Generate Summary", variant="primary", size="lg")
-        # --- Right Column: Output & Stats ---
-        with gr.Column(scale=5):
-            gr.Markdown("### Results")
-            output_summary = gr.Textbox(
-                label="Generated Summary",
-                lines=10,
-                interactive=False,
-                show_copy_button=True
-            )
-            performance_report = gr.Textbox(
-                label="Performance Metrics",
-                lines=2,
-                interactive=False
             )
-    # Event Binding
-    process_button.click(
-        fn=generate_summary_and_compare,
-        inputs=[input_document, model_selector, summary_control],
-        outputs=[output_summary, performance_report]
     )
 if __name__ == "__main__":
-    demo.launch()

 import os
+import traceback
+import time
+from huggingface_hub import snapshot_download
+import gradio as gr
+# 尝试导入 llama_cpp，如果失败则在 UI 中提示
+try:
+    from llama_cpp import Llama
+except Exception as e:
+    Llama = None
+    Llama_import_error = e
+# ---------- 配置区域 ----------
+# ★★★ 请在这里修改为你的模型仓库 ★★★
+MODEL_REPO = "Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF"
+# 指定只下载 q4_k_m 文件，防止下载多余文件爆盘
+GGUF_FILENAME = "unsloth.Q4_K_M.gguf"
+DEFAULT_N_CTX = 2048  # 上下文长度
+DEFAULT_MAX_TOKENS = 256 # 默认生成长度
+DEFAULT_N_THREADS = 2 # 免费 CPU 建议设为 2
+# ------------------------------
+def log(msg: str):
+    print(f"[app] {time.strftime('%Y-%m-%d %H:%M:%S')} - {msg}", flush=True)
+def load_model_from_hub(repo_id: str, filename: str, n_ctx=DEFAULT_N_CTX, n_threads=DEFAULT_N_THREADS):
+    if Llama is None:
+        raise RuntimeError(f"llama-cpp-python 未安装或加载失败: {Llama_import_error}")
+    log(f"开始下载模型: {repo_id} / {filename} ...")
+    # 使用 snapshot_download 下载单个文件
+    # allow_patterns 确保只下载 GGUF
+    local_dir = snapshot_download(
+        repo_id=repo_id,
+        allow_patterns=[filename],
+        local_dir_use_symlinks=False # 在 Space 中有时软链接会有问题，禁用更稳
+    )
+    # 拼接完整路径
+    # snapshot_download 默认会保持目录结构，或者我们直接搜寻下载目录
+    gguf_path = os.path.join(local_dir, filename)
+    # 如果直接拼接找不到，尝试搜索（容错）
+    if not os.path.exists(gguf_path):
+        for root, dirs, files in os.walk(local_dir):
+            if filename in files:
+                gguf_path = os.path.join(root, filename)
+                break
+    if not os.path.exists(gguf_path):
+        raise FileNotFoundError(f"在 {local_dir} 中找不到 {filename}")
+    log(f"模型路径: {gguf_path}。正在加载到内存...")
+    # 初始化模型
+    llm = Llama(model_path=gguf_path, n_ctx=n_ctx, n_threads=n_threads, verbose=False)
+    log("Llama 模型加载成功！")
+    return llm, gguf_path
+def init_model(state):
+    """初始化按钮的回调函数"""
     try:
+        if state.get("llm") is not None:
+            return "✅ 系统就绪 (模型已加载)", state
+        log("收到加载请求...")
+        # 下载并加载
+        llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
+        # 更新状态
+        state["llm"] = llm
+        state["gguf_path"] = gguf_path
+        return "✅ 系统就绪", state
+    except Exception as exc:
+        tb = traceback.format_exc()
+        log(f"初始化错误: {exc}\n{tb}")
+        return f"❌ 初始化失败: {exc}", state
+def generate_response(prompt: str, max_tokens: int, state):
+    """生成按钮的回调函数"""
+    try:
+        if not prompt or prompt.strip() == "":
+            return "⚠️ 请输入指令。", "⚠️ 空闲", state
+        # 懒加载：如果没点初始化直接点生成，尝试自动加载
+        if state.get("llm") is None:
+            try:
+                log("未检测到模型，尝试自动加载...")
+                llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
+                state["llm"] = llm
+                state["gguf_path"] = gguf_path
+            except Exception as e:
+                return f"❌ 模型加载失败: {e}", f"❌ 错误", state
+        llm = state.get("llm")
+        log(f"正在生成 (Prompt 长度={len(prompt)})...")
+        # 构造 Llama 3 格式的 Prompt
+        system_prompt = "You are a helpful AI assistant."
+        # 简单拼接：System + User
+        # 如果需要更严格的格式，可以使用 tokenizer.apply_chat_template
+        # 这里为了通用性使用简单的文本拼接，Llama 3 通常也能理解
+        full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
+        # 推理
+        output = llm(
+            full_prompt,
+            max_tokens=max_tokens,
+            stop=["<|eot_id|>"], # 停止符
+            echo=False
         )
+        text = output['choices'][0]['text']
+        log("生成完成。")
+        return text, "✅ 生成完毕", state
+    except Exception as exc:
+        tb = traceback.format_exc()
+        log(f"生成错误: {exc}\n{tb}")
+        return f"运行出错: {exc}", f"❌ 异常", state
+def soft_clear(current_state):
+    """清除按钮：只清空文本，保留模型"""
+    status = "✅ 系统就绪" if current_state.get("llm") else "⚪ 未初始化"
+    return "", status, current_state
+# ---------------- Gradio UI 构建 ----------------
+# 主题设置
 theme = gr.themes.Soft(
+    primary_hue="indigo",
+    secondary_hue="slate",
+    neutral_hue="slate"
 )
+# 自定义 CSS
+custom_css = """
+.footer-text { font-size: 0.8em; color: gray; text-align: center; }
+"""
+with gr.Blocks(title="Llama 3.2 Lab2 Project") as demo:
+    # 标题头
     with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("# 🦙 Llama 3.2 (3B) Fine-Tuned Chatbot")
+            gr.Markdown(
+                f"""
+                **ID2223 Lab 2 Project** | Fine-tuned on **FineTome-100k**.
+                Running on CPU (GGUF 4-bit) | Model: `{MODEL_REPO}`
+                """
             )
+        with gr.Column(scale=0, min_width=150):
+            status_label = gr.Label(value="⚪ 未初始化", label="系统状态", show_label=False)
+    # 主体布局
+    with gr.Row():
+        # 左侧：输入与控制
+        with gr.Column(scale=4):
             with gr.Group():
+                prompt_in = gr.Textbox(
+                    lines=5,
+                    label="用户指令 (User Input)",
+                    placeholder="例如：请解释量子力学...",
+                    elem_id="prompt-input"
+                )
+                with gr.Accordion("⚙️ 高级参数 (Advanced)", open=False):
+                    max_tokens = gr.Slider(
+                        minimum=16,
+                        maximum=1024,
+                        step=16,
+                        value=DEFAULT_MAX_TOKENS,
+                        label="最大生成长度 (Max Tokens)",
+                        info="生成的越长，CPU 耗时越久。"
                     )
+            with gr.Row():
+                init_btn = gr.Button("🚀 1. 加载模型 (Load)", variant="secondary")
+                gen_btn = gr.Button("✨ 2. 生成回复 (Generate)", variant="primary")
+            clear_btn = gr.Button("🗑️ 清空历史 (Clear)", variant="stop")
+        # 右侧：输出显示
+        with gr.Column(scale=6):
+            output_txt = gr.Textbox(
+                label="模型回复 (Response)",
+                lines=15,
             )
+    # 底部说明
+    with gr.Row():
+        gr.Markdown(
+            "⚠️ *注意：推理在免费 CPU 上运行，速度可能较慢。首次运行时需要下载模型（约2GB），请耐心等待。*",
+            elem_classes=["footer-text"]
+        )
+    # 状态存储
+    state = gr.State({"llm": None, "gguf_path": None, "status": "Not initialized"})
+    # 事件绑定
+    init_btn.click(
+        fn=init_model,
+        inputs=state,
+        outputs=[status_label, state],
+        show_progress=True
     )
+    gen_btn.click(
+        fn=generate_response,
+        inputs=[prompt_in, max_tokens, state],
+        outputs=[output_txt, status_label, state],
+        show_progress=True
+    )
+    clear_btn.click(fn=soft_clear, inputs=[state], outputs=[prompt_in, status_label, state])
+    clear_btn.click(lambda: "", outputs=[output_txt])
+# 启动应用
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)