Spaces:

hellokawei
/

language

Runtime error

File size: 6,981 Bytes

17a3c9b
 
373a381
6a3c032
373a381
6a3c032
6ac95b8
6a3c032
 
 
 
17a3c9b
e27dbdf
 
17a3c9b
 
e27dbdf
 
6a3c032
17a3c9b
0d6dc53
373a381
6a3c032
17a3c9b
 
6a3c032
 
 
 
17a3c9b
 
6a3c032
17a3c9b
 
 
6a3c032
 
 
373a381
17a3c9b
 
 
 
373a381
e27dbdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3c054d
17a3c9b
 
 
 
 
 
 
 
 
 
 
 
 
e27dbdf
 
17a3c9b
 
 
 
 
 
 
 
 
 
 
e27dbdf
 
17a3c9b
 
 
 
 
 
 
 
 
 
 
 
e27dbdf
17a3c9b
 
 
d3c054d
e27dbdf
17a3c9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3c054d
e27dbdf
17a3c9b
 
 
 
 
 
e27dbdf
 
17a3c9b
373a381
17a3c9b
 
 
 
 
 
e27dbdf
373a381
17a3c9b
e27dbdf
 
17a3c9b
 
 
e27dbdf
17a3c9b
 
 
 
 
 
 
 
 
e27dbdf
 
 
 
 
17a3c9b
e27dbdf
17a3c9b
 
 
 
e27dbdf
17a3c9b
 
 
e27dbdf
17a3c9b
 
 
 
e27dbdf
17a3c9b
 
e27dbdf
 
 
17a3c9b
 
 
e27dbdf
373a381
17a3c9b
373a381
e27dbdf

import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# 从环境变量获取Hugging Face Token
hf_token = os.environ.get("language")
if not hf_token:
    raise EnvironmentError("未找到HUGGINGFACE_HUB_TOKEN环境变量，请在Space设置中添加")

# 模型配置 - 使用公开模型
MODELS = {
    "Zephyr 7B Beta": {
        "model_id": "HuggingFaceH4/zephyr-7b-beta",
        "kwargs": {"torch_dtype": torch.float16}
    },
    "Falcon 7B Instruct": {
        "model_id": "tiiuae/falcon-7b-instruct",
        "kwargs": {"torch_dtype": torch.float16, "trust_remote_code": True}
    }
}

# 加载模型
def load_model(model_name):
    model_config = MODELS[model_name]
    tokenizer = AutoTokenizer.from_pretrained(
        model_config["model_id"],
        use_auth_token=hf_token
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_config["model_id"],
        use_auth_token=hf_token,
        **model_config["kwargs"]
    )
    device = "cuda" if torch.cuda.is_available() else "cpu"
    return model.to(device), tokenizer, device

# 其余代码（界面构建和交互逻辑）保持不变...

# 初始化模型
loaded_models = {}
for model_name in MODELS:
    loaded_models[model_name] = load_model(model_name)

# 构建对话提示词（针对不同模型可能需要不同格式）
def build_prompt(message, history, system_prompt, model_name):
    # Zephyr/Mistral等模型使用简单格式
    if "Zephyr" in model_name or "Mistral" in model_name:
        prompt = f"系统提示: {system_prompt}\n"
        for user_msg, assistant_msg in history:
            prompt += f"用户: {user_msg}\n助手: {assistant_msg}\n"
        prompt += f"用户: {message}\n助手:"
        return prompt
    
    # Falcon模型使用更简洁的格式
    elif "Falcon" in model_name:
        prompt = f"### System:\n{system_prompt}\n\n"
        for user_msg, assistant_msg in history:
            prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
        prompt += f"### User:\n{message}\n\n### Assistant:"
        return prompt
    
    # 默认为通用格式
    else:
        prompt = f"[System] {system_prompt}\n"
        for user_msg, assistant_msg in history:
            prompt += f"[User] {user_msg}\n[Assistant] {assistant_msg}\n"
        prompt += f"[User] {message}\n[Assistant]"
        return prompt

# 模型推理函数
def generate_response(
    message: str,
    history: List[Tuple[str, str]],
    system_prompt: str,
    model_name: str,
    max_new_tokens: int,
    temperature: float,
    top_p: float,
    top_k: int
):
    model, tokenizer, device = loaded_models[model_name]
    
    # 构建提示词
    full_prompt = build_prompt(message, history, system_prompt, model_name)
    
    # 编码输入
    inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
    
    # 生成参数
    generate_kwargs = {
        "max_new_tokens": max_new_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": top_k,
        "do_sample": True,
        "eos_token_id": tokenizer.eos_token_id or tokenizer.unk_token_id,
        "pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id
    }
    
    # 生成响应
    with torch.no_grad():
        output = model.generate(
            **inputs,
            **generate_kwargs
        )
    
    # 解码输出
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # 提取模型生成的部分
    response = response[len(full_prompt):].strip()
    
    return response

# 处理用户输入
def process_chat(
    message: str,
    history: List[Tuple[str, str]],
    system_prompt: str,
    model_name: str,
    max_new_tokens: int,
    temperature: float,
    top_p: float,
    top_k: int
):
    response = generate_response(
        message, history, system_prompt, model_name,
        max_new_tokens, temperature, top_p, top_k
    )
    history.append((message, response))
    return history, history

# 语音转文字功能
asr = None
if torch.cuda.is_available() or torch.backends.mps.is_available():
    try:
        from transformers import WhisperProcessor, WhisperForConditionalGeneration
        processor = WhisperProcessor.from_pretrained("openai/whisper-base")
        asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
        asr = {"processor": processor, "model": asr_model}
    except:
        asr = None

def transcribe(audio):
    if asr is None:
        return "语音识别模型未加载"
    processor, model = asr["processor"], asr["model"]
    input_features = processor(audio, return_tensors="pt").input_features.to(model.device)
    predicted_ids = model.generate(input_features)
    return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]

# 构建Gradio界面
with gr.Blocks(title="无权限语言模型对话助手") as demo:
    gr.Markdown("## 公开语言模型对话应用（无需访问权限）")
    
    with gr.Row():
        with gr.Column(scale=1):
            message_input = gr.Textbox(label="输入消息")
            system_prompt = gr.Textbox(
                label="系统提示词",
                value="你是一个 helpful、知识渊博的AI助手。",
            )
            model_choice = gr.Dropdown(
                choices=list(MODELS.keys()),
                value=list(MODELS.keys())[0],
                label="选择语言模型"
            )
            with gr.Accordion("生成参数", open=False):
                max_new_tokens = gr.Slider(minimum=1, maximum=2048, value=512, label="最大Token数")
                temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="随机性")
                top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p采样")
                top_k = gr.Slider(minimum=1, maximum=100, value=50, label="Top-k采样")
            use_voice = gr.Checkbox(label="使用语音输入")
            audio_input = gr.Audio(type="filepath", label="语音输入")
            send_btn = gr.Button("发送消息", variant="primary")
            clear_btn = gr.Button("清空对话")
        
        with gr.Column(scale=2):
            chat_history = gr.Chatbot(label="对话历史")
    
    # 语音输入处理
    audio_input.change(
        fn=lambda audio, use: transcribe(audio) if use else "",
        inputs=[audio_input, use_voice],
        outputs=message_input
    )
    
    # 发送消息
    send_btn.click(
        fn=process_chat,
        inputs=[message_input, chat_history, system_prompt, model_choice,
                max_new_tokens, temperature, top_p, top_k],
        outputs=[chat_history, chat_history]
    )
    
    # 清空对话
    clear_btn.click(fn=lambda: None, outputs=chat_history)

# 启动应用
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)