import gradio as gr
import requests
import os
import json

# --- 配置 ---
# 从Hugging Face Space的Secrets中获取API Token
# 请确保在你的Space设置中添加了名为 "HF_TOKEN" 的Secret
HF_TOKEN = os.getenv("HF_TOKEN")
API_URL = "https://api-inference.huggingface.co/models/badanwang/teacher_basic_qwen3-0.6b"

# --- 核心对话函数 ---
def predict(message, history):
    """
    主函数，用于与Hugging Face Inference API进行流式对话。
    :param message: 用户当前发送的消息 (str)
    :param history: 对话历史 (list of lists)，格式为 [[user_msg, assistant_msg], ...]
    :return: 一个生成器 (generator)，逐字(token)返回模型的响应
    """
    if not HF_TOKEN:
        raise gr.Error("Hugging Face API Token 未配置！请在Space的Secrets中添加 HF_TOKEN。")

    headers = {
        "Authorization": f"Bearer {HF_TOKEN}",
        "Content-Type": "application/json"
    }

    # 1. 格式化对话历史以符合API要求
    # API需要一个包含所有对话的列表，格式为 {"role": "user", "content": "..."} 或 {"role": "assistant", "content": "..."}
    messages = []
    for turn in history:
        user_msg, assistant_msg = turn
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": assistant_msg})
    
    # 添加当前用户消息
    messages.append({"role": "user", "content": message})

    # 2. 构建API请求体
    # 我们启用流式响应 (stream=True)
    payload = {
        "inputs": messages,
        "parameters": {
            "max_new_tokens": 2048,  # 根据需要调整
            "temperature": 0.7,
            "top_p": 0.95,
            "repetition_penalty": 1.1,
            "return_full_text": False,
        },
        "stream": True
    }

    # 3. 发送流式请求并处理响应
    full_response = ""
    try:
        # 使用 requests 发送POST请求，并设置 stream=True
        with requests.post(API_URL, headers=headers, json=payload, stream=True, timeout=120) as response:
            # 检查HTTP响应状态码
            response.raise_for_status() 
            
            # 逐行读取流式响应
            for line in response.iter_lines():
                if line:
                    # 流式响应通常以 "data:" 开头，后跟一个JSON对象
                    decoded_line = line.decode('utf-8')
                    if decoded_line.startswith("data:"):
                        try:
                            # 解析JSON
                            json_data = json.loads(decoded_line[5:])
                            # 提取token文本
                            token = json_data.get("token", {}).get("text", "")
                            if token:
                                full_response += token
                                yield full_response
                        except json.JSONDecodeError:
                            # 忽略无法解析的行
                            continue
                            
    except requests.exceptions.RequestException as e:
        print(f"API请求错误: {e}")
        yield f"抱歉，与模型API通信时发生错误: {e}"
    except Exception as e:
        print(f"发生未知错误: {e}")
        yield f"抱歉，发生了一个未知错误: {e}"

# --- 创建并启动Gradio界面 ---

# 使用gr.ChatInterface，它为聊天机器人提供了完整的UI
# fn=predict 指定了处理逻辑的函数
# streaming=True 告诉Gradio我们的函数是流式的（使用yield）
# Gradio 4.44.1中，ChatInterface会自动处理stream参数，我们只需确保函数是生成器
demo = gr.ChatInterface(
    fn=predict,
    title="小Q老师 - 基础问答",
    description="与 badanwang/teacher_basic_qwen3-0.6b 模型进行流式对话。直接输入问题开始。",
    examples=[["你好"], ["请用python写一个快速排序算法"], ["给我讲个笑话吧"]],
    cache_examples=False,
)

if __name__ == "__main__":
    # demo.launch(share=True) # 如果在本地运行并需要分享链接
    demo.launch() # 在Hugging Face Spaces上运行时使用