File size: 6,894 Bytes
17a3c9b
 
373a381
0a9bd20
 
373a381
0a9bd20
6ac95b8
6a3c032
0a9bd20
 
 
 
 
 
6a3c032
 
17a3c9b
e27dbdf
 
17a3c9b
 
e27dbdf
 
6a3c032
17a3c9b
0d6dc53
373a381
6a3c032
17a3c9b
 
6a3c032
 
 
 
17a3c9b
 
6a3c032
17a3c9b
 
 
6a3c032
 
17a3c9b
 
 
 
373a381
0a9bd20
 
 
e27dbdf
 
 
 
 
 
 
 
 
 
 
 
 
 
0a9bd20
d3c054d
17a3c9b
 
 
 
 
 
 
 
 
 
0a9bd20
17a3c9b
e27dbdf
17a3c9b
 
 
 
 
 
 
 
e27dbdf
 
17a3c9b
 
 
0a9bd20
17a3c9b
0a9bd20
d3c054d
e27dbdf
17a3c9b
 
 
 
 
 
 
 
 
0a9bd20
 
17a3c9b
 
d3c054d
e27dbdf
17a3c9b
 
 
 
 
e27dbdf
0a9bd20
 
17a3c9b
373a381
0a9bd20
17a3c9b
 
 
 
 
e27dbdf
373a381
17a3c9b
e27dbdf
 
17a3c9b
 
 
e27dbdf
17a3c9b
 
 
 
 
 
 
 
 
e27dbdf
 
 
 
 
17a3c9b
e27dbdf
17a3c9b
 
 
 
e27dbdf
17a3c9b
 
 
e27dbdf
17a3c9b
 
 
 
e27dbdf
17a3c9b
 
0a9bd20
e27dbdf
17a3c9b
 
 
e27dbdf
373a381
17a3c9b
373a381
e27dbdf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, WhisperProcessor, WhisperForConditionalGeneration
from typing import List, Tuple  # 新增:导入类型

# 方案 A:使用自定义环境变量名 "language"
hf_token = os.environ.get("language")
if not hf_token:
    raise EnvironmentError("未找到名为 'language' 的环境变量,请在Space设置中添加")

# 方案 B:改用规范的 "HUGGINGFACE_HUB_TOKEN"(需同步修改Space环境变量)
# hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN")
# if not hf_token:
#     raise EnvironmentError("未找到HUGGINGFACE_HUB_TOKEN环境变量,请在Space设置中添加")

# 模型配置 - 使用公开模型
MODELS = {
    "Zephyr 7B Beta": {
        "model_id": "HuggingFaceH4/zephyr-7b-beta",
        "kwargs": {"torch_dtype": torch.float16}
    },
    "Falcon 7B Instruct": {
        "model_id": "tiiuae/falcon-7b-instruct",
        "kwargs": {"torch_dtype": torch.float16, "trust_remote_code": True}
    }
}

# 加载模型
def load_model(model_name):
    model_config = MODELS[model_name]
    tokenizer = AutoTokenizer.from_pretrained(
        model_config["model_id"],
        use_auth_token=hf_token
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_config["model_id"],
        use_auth_token=hf_token,
        **model_config["kwargs"]
    )
    device = "cuda" if torch.cuda.is_available() else "cpu"
    return model.to(device), tokenizer, device

# 初始化模型
loaded_models = {}
for model_name in MODELS:
    loaded_models[model_name] = load_model(model_name)

# 构建对话提示词
def build_prompt(message: str, history: List[Tuple[str, str]], system_prompt: str, model_name: str) -> str:
    if "Zephyr" in model_name:
        prompt = f"系统提示: {system_prompt}\n"
        for user_msg, assistant_msg in history:
            prompt += f"用户: {user_msg}\n助手: {assistant_msg}\n"
        prompt += f"用户: {message}\n助手:"
    elif "Falcon" in model_name:
        prompt = f"### System:\n{system_prompt}\n\n"
        for user_msg, assistant_msg in history:
            prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
        prompt += f"### User:\n{message}\n\n### Assistant:"
    else:
        prompt = f"[System] {system_prompt}\n"
        for user_msg, assistant_msg in history:
            prompt += f"[User] {user_msg}\n[Assistant] {assistant_msg}\n"
        prompt += f"[User] {message}\n[Assistant]"
    return prompt

# 模型推理函数
def generate_response(
    message: str,
    history: List[Tuple[str, str]],
    system_prompt: str,
    model_name: str,
    max_new_tokens: int,
    temperature: float,
    top_p: float,
    top_k: int
) -> str:
    model, tokenizer, device = loaded_models[model_name]
    full_prompt = build_prompt(message, history, system_prompt, model_name)
    inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
    
    generate_kwargs = {
        "max_new_tokens": max_new_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": top_k,
        "do_sample": True,
        "eos_token_id": tokenizer.eos_token_id or tokenizer.unk_token_id,
        "pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id
    }
    
    with torch.no_grad():
        output = model.generate(**inputs, **generate_kwargs)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response[len(full_prompt):].strip()

# 处理用户输入
def process_chat(
    message: str,
    history: List[Tuple[str, str]],
    system_prompt: str,
    model_name: str,
    max_new_tokens: int,
    temperature: float,
    top_p: float,
    top_k: int
) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
    response = generate_response(message, history, system_prompt, model_name, max_new_tokens, temperature, top_p, top_k)
    history.append((message, response))
    return history, history

# 语音转文字功能
asr = None
if torch.cuda.is_available() or torch.backends.mps.is_available():
    try:
        processor = WhisperProcessor.from_pretrained("openai/whisper-base")
        asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
        asr = {"processor": processor, "model": asr_model}
    except Exception as e:
        print(f"语音模型加载失败: {e}")
        asr = None

def transcribe(audio) -> str:
    if asr is None:
        return "语音识别模型未加载"
    processor, model = asr["processor"], asr["model"]
    input_features = processor(audio, return_tensors="pt").input_features.to(model.device)
    predicted_ids = model.generate(input_features)
    return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]

# 构建Gradio界面
with gr.Blocks(title="无权限语言模型对话助手") as demo:
    gr.Markdown("## 公开语言模型对话应用(无需访问权限)")
    
    with gr.Row():
        with gr.Column(scale=1):
            message_input = gr.Textbox(label="输入消息")
            system_prompt = gr.Textbox(
                label="系统提示词",
                value="你是一个 helpful、知识渊博的AI助手。",
            )
            model_choice = gr.Dropdown(
                choices=list(MODELS.keys()),
                value=list(MODELS.keys())[0],
                label="选择语言模型"
            )
            with gr.Accordion("生成参数", open=False):
                max_new_tokens = gr.Slider(minimum=1, maximum=2048, value=512, label="最大Token数")
                temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="随机性")
                top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p采样")
                top_k = gr.Slider(minimum=1, maximum=100, value=50, label="Top-k采样")
            use_voice = gr.Checkbox(label="使用语音输入")
            audio_input = gr.Audio(type="filepath", label="语音输入")
            send_btn = gr.Button("发送消息", variant="primary")
            clear_btn = gr.Button("清空对话")
        
        with gr.Column(scale=2):
            chat_history = gr.Chatbot(label="对话历史")
    
    # 语音输入处理
    audio_input.change(
        fn=lambda audio, use: transcribe(audio) if use else "",
        inputs=[audio_input, use_voice],
        outputs=message_input
    )
    
    # 发送消息
    send_btn.click(
        fn=process_chat,
        inputs=[message_input, chat_history, system_prompt, model_choice, max_new_tokens, temperature, top_p, top_k],
        outputs=[chat_history, chat_history]
    )
    
    # 清空对话
    clear_btn.click(fn=lambda: None, outputs=chat_history)

# 启动应用
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)