Spaces:

Marcus719
/

ID2223_Lab2

Sleeping

App Files Files Community

Marcus719 commited on Dec 6, 2025

Commit

078bd3c

verified ·

1 Parent(s): 4345ede

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -203

app.py CHANGED Viewed

@@ -1,226 +1,166 @@
-import os
-import traceback
-import time
-from huggingface_hub import snapshot_download
 import gradio as gr
-# 尝试导入 llama_cpp，如果失败则在 UI 中提示
-try:
-    from llama_cpp import Llama
-except Exception as e:
-    Llama = None
-    Llama_import_error = e
-# ---------- 配置区域 ----------
-# ★★★ 请在这里修改为你的模型仓库 ★★★
-MODEL_REPO = "Marcus719/Llama-3.2-3B-Instruct-FineTome-Lab2-GGUF"
-# 指定只下载 q4_k_m 文件，防止下载多余文件爆盘
-GGUF_FILENAME = "unsloth.Q4_K_M.gguf"
-DEFAULT_N_CTX = 2048  # 上下文长度
-DEFAULT_MAX_TOKENS = 256 # 默认生成长度
-DEFAULT_N_THREADS = 2 # 免费 CPU 建议设为 2
-# ------------------------------
-def log(msg: str):
-    print(f"[app] {time.strftime('%Y-%m-%d %H:%M:%S')} - {msg}", flush=True)
-def load_model_from_hub(repo_id: str, filename: str, n_ctx=DEFAULT_N_CTX, n_threads=DEFAULT_N_THREADS):
-    if Llama is None:
-        raise RuntimeError(f"llama-cpp-python 未安装或加载失败: {Llama_import_error}")
-    log(f"开始下载模型: {repo_id} / {filename} ...")
-    # 使用 snapshot_download 下载单个文件
-    # allow_patterns 确保只下载 GGUF
-    local_dir = snapshot_download(
-        repo_id=repo_id,
-        allow_patterns=[filename],
-        local_dir_use_symlinks=False # 在 Space 中有时软链接会有问题，禁用更稳
-    )
-    # 拼接完整路径
-    # snapshot_download 默认会保持目录结构，或者我们直接搜寻下载目录
-    gguf_path = os.path.join(local_dir, filename)
-    # 如果直接拼接找不到，尝试搜索（容错）
-    if not os.path.exists(gguf_path):
-        for root, dirs, files in os.walk(local_dir):
-            if filename in files:
-                gguf_path = os.path.join(root, filename)
-                break
-    if not os.path.exists(gguf_path):
-        raise FileNotFoundError(f"在 {local_dir} 中找不到 {filename}")
-    log(f"模型路径: {gguf_path}。正在加载到内存...")
-    # 初始化模型
-    llm = Llama(model_path=gguf_path, n_ctx=n_ctx, n_threads=n_threads, verbose=False)
-    log("Llama 模型加载成功！")
-    return llm, gguf_path
-def init_model(state):
-    """初始化按钮的回调函数"""
     try:
-        if state.get("llm") is not None:
-            return "✅ 系统就绪 (模型已加载)", state
-        log("收到加载请求...")
-        # 下载并加载
-        llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
-        # 更新状态
-        state["llm"] = llm
-        state["gguf_path"] = gguf_path
-        return "✅ 系统就绪", state
-    except Exception as exc:
-        tb = traceback.format_exc()
-        log(f"初始化错误: {exc}\n{tb}")
-        return f"❌ 初始化失败: {exc}", state
-def generate_response(prompt: str, max_tokens: int, state):
-    """生成按钮的回调函数"""
     try:
-        if not prompt or prompt.strip() == "":
-            return "⚠️ 请输入指令。", "⚠️ 空闲", state
-        # 懒加载：如果没点初始化直接点生成，尝试自动加载
-        if state.get("llm") is None:
-            try:
-                log("未检测到模型，尝试自动加载...")
-                llm, gguf_path = load_model_from_hub(MODEL_REPO, GGUF_FILENAME)
-                state["llm"] = llm
-                state["gguf_path"] = gguf_path
-            except Exception as e:
-                return f"❌ 模型加载失败: {e}", f"❌ 错误", state
-        llm = state.get("llm")
-        log(f"正在生成 (Prompt 长度={len(prompt)})...")
-        # 构造 Llama 3 格式的 Prompt
-        system_prompt = "You are a helpful AI assistant."
-        # 简单拼接：System + User
-        # 如果需要更严格的格式，可以使用 tokenizer.apply_chat_template
-        # 这里为了通用性使用简单的文本拼接，Llama 3 通常也能理解
-        full_prompt = f"<|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
-        # 推理
-        output = llm(
-            full_prompt,
-            max_tokens=max_tokens,
-            stop=["<|eot_id|>"], # 停止符
-            echo=False
         )
-        text = output['choices'][0]['text']
-        log("生成完成。")
-        return text, "✅ 生成完毕", state
-    except Exception as exc:
-        tb = traceback.format_exc()
-        log(f"生成错误: {exc}\n{tb}")
-        return f"运行出错: {exc}", f"❌ 异常", state
-def soft_clear(current_state):
-    """清除按钮：只清空文本，保留模型"""
-    status = "✅ 系统就绪" if current_state.get("llm") else "⚪ 未初始化"
-    return "", status, current_state
-# ---------------- Gradio UI 构建 ----------------
-# 主题设置
 theme = gr.themes.Soft(
-    primary_hue="indigo",
-    secondary_hue="slate",
-    neutral_hue="slate"
 )
-# 自定义 CSS
-custom_css = """
-.footer-text { font-size: 0.8em; color: gray; text-align: center; }
-"""
-with gr.Blocks(title="Llama 3.2 Lab2 Project") as demo:
-    # 标题头
     with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("# 🦙 Llama 3.2 (3B) Fine-Tuned Chatbot")
-            gr.Markdown(
-                f"""
-                **ID2223 Lab 2 Project** | Fine-tuned on **FineTome-100k**.
-                Running on CPU (GGUF 4-bit) | Model: `{MODEL_REPO}`
-                """
-            )
-        with gr.Column(scale=0, min_width=150):
-            status_label = gr.Label(value="⚪ 未初始化", label="系统状态", show_label=False)
-    # 主体布局
-    with gr.Row():
-        # 左侧：输入与控制
-        with gr.Column(scale=4):
             with gr.Group():
-                prompt_in = gr.Textbox(
-                    lines=5,
-                    label="用户指令 (User Input)",
-                    placeholder="例如：请解释量子力学...",
-                    elem_id="prompt-input"
-                )
-                with gr.Accordion("⚙️ 高级参数 (Advanced)", open=False):
-                    max_tokens = gr.Slider(
-                        minimum=16,
-                        maximum=1024,
-                        step=16,
-                        value=DEFAULT_MAX_TOKENS,
-                        label="最大生成长度 (Max Tokens)",
-                        info="生成的越长，CPU 耗时越久。"
                     )
-            with gr.Row():
-                init_btn = gr.Button("🚀 1. 加载模型 (Load)", variant="secondary")
-                gen_btn = gr.Button("✨ 2. 生成回复 (Generate)", variant="primary")
-            clear_btn = gr.Button("🗑️ 清空历史 (Clear)", variant="stop")
-        # 右侧：输出显示
-        with gr.Column(scale=6):
-            output_txt = gr.Textbox(
-                label="模型回复 (Response)",
-                lines=15,
             )
-    # 底部说明
-    with gr.Row():
-        gr.Markdown(
-            "⚠️ *注意：推理在免费 CPU 上运行，速度可能较慢。首次运行时需要下载模型（约2GB），请耐心等待。*",
-            elem_classes=["footer-text"]
-        )
-    # 状态存储
-    state = gr.State({"llm": None, "gguf_path": None, "status": "Not initialized"})
-    # 事件绑定
-    init_btn.click(
-        fn=init_model,
-        inputs=state,
-        outputs=[status_label, state],
-        show_progress=True
     )
-    gen_btn.click(
-        fn=generate_response,
-        inputs=[prompt_in, max_tokens, state],
-        outputs=[output_txt, status_label, state],
-        show_progress=True
-    )
-    clear_btn.click(fn=soft_clear, inputs=[state], outputs=[prompt_in, status_label, state])
-    clear_btn.click(lambda: "", outputs=[output_txt])
-# 启动应用
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+import time
+import os
+# from llama_cpp import Llama # Uncomment if running locally with the library installed
+import numpy as np
+# --- CONFIGURATION ---
+GGUF_MODEL_PATH_1B = "./llama-3.2-1b-summary-q4_k_m.gguf"
+GGUF_MODEL_PATH_3B = "./llama-3.2-3b-summary-q4_k_m.gguf"
+SYSTEM_PROMPT = (
+    "You are an expert summarization bot. Your task is to provide a comprehensive "
+    "and concise summary of the user's document based on the requested length."
+)
+# ----------------------------------------------------
+# 1. MODEL LOADING FUNCTION
+# ----------------------------------------------------
+# Note: For demonstration purposes, I am keeping your logic structure.
+# Ensure llama-cpp-python is installed to run this part.
+def load_llm(model_path):
+    print(f"Attempting to load GGUF model: {model_path}...")
     try:
+        from llama_cpp import Llama
+        llm = Llama(
+            model_path=model_path,
+            n_gpu_layers=0,
+            n_ctx=2048,
+            verbose=True
+        )
+        print(f"Successfully loaded model: {model_path}")
+        return llm
+    except Exception as e:
+        print(f"Error loading model {model_path}: {e}")
+        # Placeholder for when models are missing (prevents crash during UI testing)
+        return None
+# Load models globally
+llm_1b = load_llm(GGUF_MODEL_PATH_1B)
+llm_3b = load_llm(GGUF_MODEL_PATH_3B)
+# ----------------------------------------------------
+# 2. CORE PROCESSING FUNCTION
+# ----------------------------------------------------
+def generate_summary_and_compare(long_document, selected_model, summary_length):
+    # 1. Select Model
+    if "1B" in selected_model:
+        selected_llm = llm_1b
+        model_name_display = "Llama-3.2-1B"
+    elif "3B" in selected_model:
+        selected_llm = llm_3b
+        model_name_display = "Llama-3.2-3B"
+    else:
+        return "Error: Invalid model selection.", ""
+    # Check if model loaded successfully
+    if selected_llm is None:
+        return "Error: Model file not found or failed to load.", "Latency: N/A"
+    # 2. Build Prompt
+    instruction = f"Please summarize the following document and keep the summary {summary_length}. Document: \n\n{long_document}"
+    full_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{SYSTEM_PROMPT}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
+    # 3. Inference
+    start_time = time.time()
+    max_tokens = 250 if "Detailed" in summary_length else 100
     try:
+        output = selected_llm(
+            full_prompt,
+            max_tokens=max_tokens,
+            stop=["<|eot_id|>"],
+            temperature=0.7,
+            echo=False,
         )
+        end_time = time.time()
+        total_latency = end_time - start_time
+        summary_output = output["choices"][0]["text"].strip()
+    except Exception as e:
+        total_latency = time.time() - start_time
+        summary_output = f"Inference Error on {model_name_display}. Error: {e}"
+    # 4. Report
+    speed_report = f"Model: {model_name_display}\nTotal Latency: {total_latency:.2f} seconds"
+    return summary_output, speed_report
+# ----------------------------------------------------
+# 3. GRADIO INTERFACE (UI IMPROVED)
+# ----------------------------------------------------
+# 使用 Soft 主题，色调简洁
 theme = gr.themes.Soft(
+    primary_hue="blue",
+    neutral_hue="slate",
+).set(
+    button_primary_background_fill="*primary_500",
+    button_primary_background_fill_hover="*primary_600",
 )
+with gr.Blocks(title="KTH ID2223 Lab 2", theme=theme) as demo:
+    # Header Section
     with gr.Row():
+        gr.Markdown(
+            """
+            # LLM Document Summarizer
+            Select a model and input your text below to generate a summary.
+            """
+        )
+    with gr.Row(equal_height=False):
+        # --- Left Column: Input & Controls ---
+        with gr.Column(scale=4, variant="panel"):
+            gr.Markdown("### Input Configuration")
+            input_document = gr.Textbox(
+                lines=12,
+                label="Document Content",
+                placeholder="Paste the text you need summarized here...",
+                show_copy_button=True
+            )
+            # Grouping settings for a cleaner look
             with gr.Group():
+                with gr.Row():
+                    model_selector = gr.Radio(
+                        ["Llama-3.2-1B (Faster)", "Llama-3.2-3B (Quality)"],
+                        label="Model Selection",
+                        value="Llama-3.2-1B (Faster)"
+                    )
+                    summary_control = gr.Radio(
+                        ["Concise (<50 words)", "Detailed (<200 words)"],
+                        label="Summary Length",
+                        value="Concise (<50 words)"
                     )
+            process_button = gr.Button("Generate Summary", variant="primary", size="lg")
+        # --- Right Column: Output & Stats ---
+        with gr.Column(scale=5):
+            gr.Markdown("### Results")
+            output_summary = gr.Textbox(
+                label="Generated Summary",
+                lines=10,
+                interactive=False,
+                show_copy_button=True
+            )
+            performance_report = gr.Textbox(
+                label="Performance Metrics",
+                lines=2,
+                interactive=False
             )
+    # Event Binding
+    process_button.click(
+        fn=generate_summary_and_compare,
+        inputs=[input_document, model_selector, summary_control],
+        outputs=[output_summary, performance_report]
     )
 if __name__ == "__main__":
+    demo.launch()