CSC9090_qwen3-0.6b-base-2

Running

App Files Files Community

badanwang commited on Jul 18

Commit

c4614a3

verified ·

1 Parent(s): 9eb5e7a

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -151

app.py CHANGED Viewed

@@ -1,165 +1,102 @@
 import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-from threading import Thread
 import os
 # --- 配置 ---
-MODEL_ID = os.getenv("MODEL_ID", "badanwang/teacher_basic_qwen3-0.6b")
-print(f"INFO: Application startup. Loading model: {MODEL_ID}")
-# --- 1. 模型加载 (内置健壮的错误处理) ---
-try:
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID,
-        torch_dtype="auto",
-        device_map="auto",
-        trust_remote_code=True
-    )
-    print("INFO: Model and tokenizer loaded successfully!")
-    model_loaded = True
-except Exception as e:
-    print(f"FATAL: Failed to load model or tokenizer: {e}")
-    model_loaded = False
-    model_load_error = e
-# --- 2. 核心流式推理函数 ---
-def stream_predict(prompt: str, history: list[list[str]]):
     """
-    一个生成器函数，用于流式生成对话。
-    它会逐步 (yield) 返回完整的对话历史。
     """
-    if not model_loaded:
-        # 如果模型加载失败，则立即抛出错误
-        raise gr.Error(f"Model is not loaded. Please check logs. Error: {model_load_error}")
-    print(f"INFO: Received prompt: '{prompt}'")
-    # 将历史记录和新提示转换为模型需要的格式
     messages = []
-    for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": assistant_msg})
-    messages.append({"role": "user", "content": prompt})
-    # 应用聊天模板
-    try:
-        input_ids = tokenizer.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            tokenize=True,
-            return_tensors="pt"
-        ).to(model.device)
-    except Exception as e:
-        raise gr.Error(f"Error applying chat template: {e}")
-    # 初始化 streamer 和生成线程
-    streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
-    generation_kwargs = dict(
-        input_ids=input_ids,
-        streamer=streamer,
-        max_new_tokens=1024,
-        do_sample=True,
-        temperature=0.7,
-        top_p=0.9
-    )
-    # 在独立线程中运行生成，防止阻塞UI
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    # 流式输出
     try:
-        # 初始化一个空的字符串来存放助手的回复
-        assistant_response = ""
-        # 每次从streamer中获取一个新的文本片段
-        for new_text in streamer:
-            if not new_text:
-                continue
-            assistant_response += new_text
-            # 将当前用户输入和不断增长的助手回复组合成新的对话历史
-            # 然后使用 yield 返回，Gradio会用它来更新UI
-            yield history + [[prompt, assistant_response]]
-        print("INFO: Streaming finished.")
     except Exception as e:
-        print(f"ERROR: An error occurred during streaming: {e}")
-        raise gr.Error(f"An error occurred during generation: {e}")
-    finally:
-        # 确保线程结束
-        thread.join()
-# --- 3. Gradio Blocks 界面布局 ---
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), css="footer {visibility: hidden}") as demo:
-    gr.Markdown(f"# 流式对话机器人\n### 模型: `{MODEL_ID}`")
-    # 使用 gr.State 来存储对话历史
-    # 这是实现多轮对话的关键
-    chatbot_state = gr.State([])
-    # Chatbot 组件用于显示对话
-    chatbot_ui = gr.Chatbot(label="对话窗口", height=600)
-    with gr.Row():
-        # Textbox 用于用户输入
-        prompt_input = gr.Textbox(
-            show_label=False,
-            placeholder="请在这里输入您的问题...",
-            scale=4,
-        )
-        # Button 用于提交
-        submit_button = gr.Button("发送", variant="primary", scale=1)
-    # 清除按钮
-    clear_button = gr.Button("清除对话历史")
-    # --- 4. 事件处理逻辑 ---
-    # 提交逻辑:
-    # 1. 点击"发送"按钮或在输入框按回车时触发
-    # 2. 调用 stream_predict 函数
-    # 3. 输入是用户输入框(prompt_input)和对话历史状态(chatbot_state)
-    # 4. 输出会实时更新聊天机器人界面(chatbot_ui)
-    # 5. 在函数开始前，将用户输入添加到聊天记录的末尾，并清空输入框
-    def on_submit(prompt, history):
-        # 将用户输入加入历史，形成 "用户: XXX" 的临时记录
-        return "", history + [[prompt, None]]
-    prompt_input.submit(
-        on_submit,
-        [prompt_input, chatbot_state],
-        [prompt_input, chatbot_ui]
-    ).then(
-        stream_predict,
-        [prompt_input, chatbot_state],
-        chatbot_ui
-    )
-    submit_button.click(
-        on_submit,
-        [prompt_input, chatbot_state],
-        [prompt_input, chatbot_ui]
-    ).then(
-        stream_predict,
-        [prompt_input, chatbot_state],
-        chatbot_ui
-    )
-    # 清除逻辑:
-    # 点击按钮时，清空状态和UI
-    def on_clear():
-        return []
-    clear_button.click(on_clear, [], chatbot_state)
-    clear_button.click(on_clear, [], chatbot_ui)
-# --- 5. 启动应用 ---
-print("INFO: Preparing to launch Gradio app...")
-# .queue() 启用请求队列，对于流式应用是必需的
-# 在Hugging Face Spaces上, 无需 share=True, Gradio会自动处理
-demo.queue().launch()

 import gradio as gr
+import requests
 import os
+import json
 # --- 配置 ---
+# 从Hugging Face Space的Secrets中获取API Token
+# 请确保在你的Space设置中添加了名为 "HF_TOKEN" 的Secret
+HF_TOKEN = os.getenv("HF_TOKEN")
+API_URL = "https://api-inference.huggingface.co/models/badanwang/teacher_basic_qwen3-0.6b"
+# --- 核心对话函数 ---
+def predict(message, history):
     """
+    主函数，用于与Hugging Face Inference API进行流式对话。
+    :param message: 用户当前发送的消息 (str)
+    :param history: 对话历史 (list of lists)，格式为 [[user_msg, assistant_msg], ...]
+    :return: 一个生成器 (generator)，逐字(token)返回模型的响应
     """
+    if not HF_TOKEN:
+        raise gr.Error("Hugging Face API Token 未配置！请在Space的Secrets中添加 HF_TOKEN。")
+    headers = {
+        "Authorization": f"Bearer {HF_TOKEN}",
+        "Content-Type": "application/json"
+    }
+    # 1. 格式化对话历史以符合API要求
+    # API需要一个包含所有对话的列表，格式为 {"role": "user", "content": "..."} 或 {"role": "assistant", "content": "..."}
     messages = []
+    for turn in history:
+        user_msg, assistant_msg = turn
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": assistant_msg})
+    # 添加当前用户消息
+    messages.append({"role": "user", "content": message})
+    # 2. 构建API请求体
+    # 我们启用流式响应 (stream=True)
+    payload = {
+        "inputs": messages,
+        "parameters": {
+            "max_new_tokens": 2048,  # 根据需要调整
+            "temperature": 0.7,
+            "top_p": 0.95,
+            "repetition_penalty": 1.1,
+            "return_full_text": False,
+        },
+        "stream": True
+    }
+    # 3. 发送流式请求并处理响应
+    full_response = ""
     try:
+        # 使用 requests 发送POST请求，并设置 stream=True
+        with requests.post(API_URL, headers=headers, json=payload, stream=True, timeout=120) as response:
+            # 检查HTTP响应状态码
+            response.raise_for_status()
+            # 逐行读取流式响��
+            for line in response.iter_lines():
+                if line:
+                    # 流式响应通常以 "data:" 开头，后跟一个JSON对象
+                    decoded_line = line.decode('utf-8')
+                    if decoded_line.startswith("data:"):
+                        try:
+                            # 解析JSON
+                            json_data = json.loads(decoded_line[5:])
+                            # 提取token文本
+                            token = json_data.get("token", {}).get("text", "")
+                            if token:
+                                full_response += token
+                                yield full_response
+                        except json.JSONDecodeError:
+                            # 忽略无法解析的行
+                            continue
+    except requests.exceptions.RequestException as e:
+        print(f"API请求错误: {e}")
+        yield f"抱歉，与模型API通信时发生错误: {e}"
     except Exception as e:
+        print(f"发生未知错误: {e}")
+        yield f"抱歉，发生了一个未知错误: {e}"
+# --- 创建并启动Gradio界面 ---
+# 使用gr.ChatInterface，它为聊天机器人提供了完整的UI
+# fn=predict 指定了处理逻辑的函数
+# streaming=True 告诉Gradio我们的函数是流式的（使用yield）
+# Gradio 4.44.1中，ChatInterface会自动处理stream参数，我们只需确保函数是生成器
+demo = gr.ChatInterface(
+    fn=predict,
+    title="小Q老师 - 基础问答",
+    description="与 badanwang/teacher_basic_qwen3-0.6b 模型进行流式对话。直接输入问题开始。",
+    examples=[["你好"], ["请用python写一个快速排序算法"], ["给我讲个笑话吧"]],
+    cache_examples=False,
+)
+if __name__ == "__main__":
+    # demo.launch(share=True) # 如果在本地运行并需要分享链接
+    demo.launch() # 在Hugging Face Spaces上运行时使用