CSC9090_qwen3-0.6b-base-2

Running

File size: 4,149 Bytes

ecb7812
c4614a3
7aee45a
c4614a3
d00fffc
9eb5e7a
c4614a3
 
 
 
ed1d652
c4614a3
 
9eb5e7a
c4614a3
 
 
 
9eb5e7a
c4614a3
 
9eb5e7a
c4614a3
 
 
 
 
 
 
9eb5e7a
c4614a3
 
9eb5e7a
 
 
c4614a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9eb5e7a
c4614a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9eb5e7a
c4614a3

import gradio as gr
import requests
import os
import json

# --- 配置 ---
# 从Hugging Face Space的Secrets中获取API Token
# 请确保在你的Space设置中添加了名为 "HF_TOKEN" 的Secret
HF_TOKEN = os.getenv("HF_TOKEN")
API_URL = "https://api-inference.huggingface.co/models/badanwang/teacher_basic_qwen3-0.6b"

# --- 核心对话函数 ---
def predict(message, history):
    """
    主函数，用于与Hugging Face Inference API进行流式对话。
    :param message: 用户当前发送的消息 (str)
    :param history: 对话历史 (list of lists)，格式为 [[user_msg, assistant_msg], ...]
    :return: 一个生成器 (generator)，逐字(token)返回模型的响应
    """
    if not HF_TOKEN:
        raise gr.Error("Hugging Face API Token 未配置！请在Space的Secrets中添加 HF_TOKEN。")

    headers = {
        "Authorization": f"Bearer {HF_TOKEN}",
        "Content-Type": "application/json"
    }

    # 1. 格式化对话历史以符合API要求
    # API需要一个包含所有对话的列表，格式为 {"role": "user", "content": "..."} 或 {"role": "assistant", "content": "..."}
    messages = []
    for turn in history:
        user_msg, assistant_msg = turn
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": assistant_msg})
    
    # 添加当前用户消息
    messages.append({"role": "user", "content": message})

    # 2. 构建API请求体
    # 我们启用流式响应 (stream=True)
    payload = {
        "inputs": messages,
        "parameters": {
            "max_new_tokens": 2048,  # 根据需要调整
            "temperature": 0.7,
            "top_p": 0.95,
            "repetition_penalty": 1.1,
            "return_full_text": False,
        },
        "stream": True
    }

    # 3. 发送流式请求并处理响应
    full_response = ""
    try:
        # 使用 requests 发送POST请求，并设置 stream=True
        with requests.post(API_URL, headers=headers, json=payload, stream=True, timeout=120) as response:
            # 检查HTTP响应状态码
            response.raise_for_status() 
            
            # 逐行读取流式响应
            for line in response.iter_lines():
                if line:
                    # 流式响应通常以 "data:" 开头，后跟一个JSON对象
                    decoded_line = line.decode('utf-8')
                    if decoded_line.startswith("data:"):
                        try:
                            # 解析JSON
                            json_data = json.loads(decoded_line[5:])
                            # 提取token文本
                            token = json_data.get("token", {}).get("text", "")
                            if token:
                                full_response += token
                                yield full_response
                        except json.JSONDecodeError:
                            # 忽略无法解析的行
                            continue
                            
    except requests.exceptions.RequestException as e:
        print(f"API请求错误: {e}")
        yield f"抱歉，与模型API通信时发生错误: {e}"
    except Exception as e:
        print(f"发生未知错误: {e}")
        yield f"抱歉，发生了一个未知错误: {e}"

# --- 创建并启动Gradio界面 ---

# 使用gr.ChatInterface，它为聊天机器人提供了完整的UI
# fn=predict 指定了处理逻辑的函数
# streaming=True 告诉Gradio我们的函数是流式的（使用yield）
# Gradio 4.44.1中，ChatInterface会自动处理stream参数，我们只需确保函数是生成器
demo = gr.ChatInterface(
    fn=predict,
    title="小Q老师 - 基础问答",
    description="与 badanwang/teacher_basic_qwen3-0.6b 模型进行流式对话。直接输入问题开始。",
    examples=[["你好"], ["请用python写一个快速排序算法"], ["给我讲个笑话吧"]],
    cache_examples=False,
)

if __name__ == "__main__":
    # demo.launch(share=True) # 如果在本地运行并需要分享链接
    demo.launch() # 在Hugging Face Spaces上运行时使用