import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, WhisperProcessor, WhisperForConditionalGeneration
from typing import List, Tuple  # 新增：导入类型

# 方案 A：使用自定义环境变量名 "language"
hf_token = os.environ.get("language")
if not hf_token:
    raise EnvironmentError("未找到名为 'language' 的环境变量，请在Space设置中添加")

# 方案 B：改用规范的 "HUGGINGFACE_HUB_TOKEN"（需同步修改Space环境变量）
# hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN")
# if not hf_token:
#     raise EnvironmentError("未找到HUGGINGFACE_HUB_TOKEN环境变量，请在Space设置中添加")

# 模型配置 - 使用公开模型
MODELS = {
    "Zephyr 7B Beta": {
        "model_id": "HuggingFaceH4/zephyr-7b-beta",
        "kwargs": {"torch_dtype": torch.float16}
    },
    "Falcon 7B Instruct": {
        "model_id": "tiiuae/falcon-7b-instruct",
        "kwargs": {"torch_dtype": torch.float16, "trust_remote_code": True}
    }
}

# 加载模型
def load_model(model_name):
    model_config = MODELS[model_name]
    tokenizer = AutoTokenizer.from_pretrained(
        model_config["model_id"],
        use_auth_token=hf_token
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_config["model_id"],
        use_auth_token=hf_token,
        **model_config["kwargs"]
    )
    device = "cuda" if torch.cuda.is_available() else "cpu"
    return model.to(device), tokenizer, device

# 初始化模型
loaded_models = {}
for model_name in MODELS:
    loaded_models[model_name] = load_model(model_name)

# 构建对话提示词
def build_prompt(message: str, history: List[Tuple[str, str]], system_prompt: str, model_name: str) -> str:
    if "Zephyr" in model_name:
        prompt = f"系统提示: {system_prompt}\n"
        for user_msg, assistant_msg in history:
            prompt += f"用户: {user_msg}\n助手: {assistant_msg}\n"
        prompt += f"用户: {message}\n助手:"
    elif "Falcon" in model_name:
        prompt = f"### System:\n{system_prompt}\n\n"
        for user_msg, assistant_msg in history:
            prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
        prompt += f"### User:\n{message}\n\n### Assistant:"
    else:
        prompt = f"[System] {system_prompt}\n"
        for user_msg, assistant_msg in history:
            prompt += f"[User] {user_msg}\n[Assistant] {assistant_msg}\n"
        prompt += f"[User] {message}\n[Assistant]"
    return prompt

# 模型推理函数
def generate_response(
    message: str,
    history: List[Tuple[str, str]],
    system_prompt: str,
    model_name: str,
    max_new_tokens: int,
    temperature: float,
    top_p: float,
    top_k: int
) -> str:
    model, tokenizer, device = loaded_models[model_name]
    full_prompt = build_prompt(message, history, system_prompt, model_name)
    inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
    
    generate_kwargs = {
        "max_new_tokens": max_new_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": top_k,
        "do_sample": True,
        "eos_token_id": tokenizer.eos_token_id or tokenizer.unk_token_id,
        "pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id
    }
    
    with torch.no_grad():
        output = model.generate(**inputs, **generate_kwargs)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response[len(full_prompt):].strip()

# 处理用户输入
def process_chat(
    message: str,
    history: List[Tuple[str, str]],
    system_prompt: str,
    model_name: str,
    max_new_tokens: int,
    temperature: float,
    top_p: float,
    top_k: int
) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
    response = generate_response(message, history, system_prompt, model_name, max_new_tokens, temperature, top_p, top_k)
    history.append((message, response))
    return history, history

# 语音转文字功能
asr = None
if torch.cuda.is_available() or torch.backends.mps.is_available():
    try:
        processor = WhisperProcessor.from_pretrained("openai/whisper-base")
        asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
        asr = {"processor": processor, "model": asr_model}
    except Exception as e:
        print(f"语音模型加载失败: {e}")
        asr = None

def transcribe(audio) -> str:
    if asr is None:
        return "语音识别模型未加载"
    processor, model = asr["processor"], asr["model"]
    input_features = processor(audio, return_tensors="pt").input_features.to(model.device)
    predicted_ids = model.generate(input_features)
    return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]

# 构建Gradio界面
with gr.Blocks(title="无权限语言模型对话助手") as demo:
    gr.Markdown("## 公开语言模型对话应用（无需访问权限）")
    
    with gr.Row():
        with gr.Column(scale=1):
            message_input = gr.Textbox(label="输入消息")
            system_prompt = gr.Textbox(
                label="系统提示词",
                value="你是一个 helpful、知识渊博的AI助手。",
            )
            model_choice = gr.Dropdown(
                choices=list(MODELS.keys()),
                value=list(MODELS.keys())[0],
                label="选择语言模型"
            )
            with gr.Accordion("生成参数", open=False):
                max_new_tokens = gr.Slider(minimum=1, maximum=2048, value=512, label="最大Token数")
                temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="随机性")
                top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p采样")
                top_k = gr.Slider(minimum=1, maximum=100, value=50, label="Top-k采样")
            use_voice = gr.Checkbox(label="使用语音输入")
            audio_input = gr.Audio(type="filepath", label="语音输入")
            send_btn = gr.Button("发送消息", variant="primary")
            clear_btn = gr.Button("清空对话")
        
        with gr.Column(scale=2):
            chat_history = gr.Chatbot(label="对话历史")
    
    # 语音输入处理
    audio_input.change(
        fn=lambda audio, use: transcribe(audio) if use else "",
        inputs=[audio_input, use_voice],
        outputs=message_input
    )
    
    # 发送消息
    send_btn.click(
        fn=process_chat,
        inputs=[message_input, chat_history, system_prompt, model_choice, max_new_tokens, temperature, top_p, top_k],
        outputs=[chat_history, chat_history]
    )
    
    # 清空对话
    clear_btn.click(fn=lambda: None, outputs=chat_history)

# 启动应用
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)