hellokawei commited on
Commit
0a9bd20
·
verified ·
1 Parent(s): 6ac95b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -44
app.py CHANGED
@@ -1,12 +1,18 @@
1
  import os
2
  import torch
3
  import gradio as gr
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
5
 
6
- # 从环境变量获取Hugging Face Token
7
  hf_token = os.environ.get("language")
8
  if not hf_token:
9
- raise EnvironmentError("未找到HUGGINGFACE_HUB_TOKEN环境变量,请在Space设置中添加")
 
 
 
 
 
10
 
11
  # 模型配置 - 使用公开模型
12
  MODELS = {
@@ -35,38 +41,29 @@ def load_model(model_name):
35
  device = "cuda" if torch.cuda.is_available() else "cpu"
36
  return model.to(device), tokenizer, device
37
 
38
- # 其余代码(界面构建和交互逻辑)保持不变...
39
-
40
  # 初始化模型
41
  loaded_models = {}
42
  for model_name in MODELS:
43
  loaded_models[model_name] = load_model(model_name)
44
 
45
- # 构建对话提示词(针对不同模型可能需要不同格式)
46
- def build_prompt(message, history, system_prompt, model_name):
47
- # Zephyr/Mistral等模型使用简单格式
48
- if "Zephyr" in model_name or "Mistral" in model_name:
49
  prompt = f"系统提示: {system_prompt}\n"
50
  for user_msg, assistant_msg in history:
51
  prompt += f"用户: {user_msg}\n助手: {assistant_msg}\n"
52
  prompt += f"用户: {message}\n助手:"
53
- return prompt
54
-
55
- # Falcon模型使用更简洁的格式
56
  elif "Falcon" in model_name:
57
  prompt = f"### System:\n{system_prompt}\n\n"
58
  for user_msg, assistant_msg in history:
59
  prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
60
  prompt += f"### User:\n{message}\n\n### Assistant:"
61
- return prompt
62
-
63
- # 默认为通用格式
64
  else:
65
  prompt = f"[System] {system_prompt}\n"
66
  for user_msg, assistant_msg in history:
67
  prompt += f"[User] {user_msg}\n[Assistant] {assistant_msg}\n"
68
  prompt += f"[User] {message}\n[Assistant]"
69
- return prompt
70
 
71
  # 模型推理函数
72
  def generate_response(
@@ -78,16 +75,11 @@ def generate_response(
78
  temperature: float,
79
  top_p: float,
80
  top_k: int
81
- ):
82
  model, tokenizer, device = loaded_models[model_name]
83
-
84
- # 构建提示词
85
  full_prompt = build_prompt(message, history, system_prompt, model_name)
86
-
87
- # 编码输入
88
  inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
89
 
90
- # 生成参数
91
  generate_kwargs = {
92
  "max_new_tokens": max_new_tokens,
93
  "temperature": temperature,
@@ -98,20 +90,10 @@ def generate_response(
98
  "pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id
99
  }
100
 
101
- # 生成响应
102
  with torch.no_grad():
103
- output = model.generate(
104
- **inputs,
105
- **generate_kwargs
106
- )
107
-
108
- # 解码输出
109
  response = tokenizer.decode(output[0], skip_special_tokens=True)
110
-
111
- # 提取模型生成的部分
112
- response = response[len(full_prompt):].strip()
113
-
114
- return response
115
 
116
  # 处理用户输入
117
  def process_chat(
@@ -123,11 +105,8 @@ def process_chat(
123
  temperature: float,
124
  top_p: float,
125
  top_k: int
126
- ):
127
- response = generate_response(
128
- message, history, system_prompt, model_name,
129
- max_new_tokens, temperature, top_p, top_k
130
- )
131
  history.append((message, response))
132
  return history, history
133
 
@@ -135,14 +114,14 @@ def process_chat(
135
  asr = None
136
  if torch.cuda.is_available() or torch.backends.mps.is_available():
137
  try:
138
- from transformers import WhisperProcessor, WhisperForConditionalGeneration
139
  processor = WhisperProcessor.from_pretrained("openai/whisper-base")
140
  asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
141
  asr = {"processor": processor, "model": asr_model}
142
- except:
 
143
  asr = None
144
 
145
- def transcribe(audio):
146
  if asr is None:
147
  return "语音识别模型未加载"
148
  processor, model = asr["processor"], asr["model"]
@@ -189,8 +168,7 @@ with gr.Blocks(title="无权限语言模型对话助手") as demo:
189
  # 发送消息
190
  send_btn.click(
191
  fn=process_chat,
192
- inputs=[message_input, chat_history, system_prompt, model_choice,
193
- max_new_tokens, temperature, top_p, top_k],
194
  outputs=[chat_history, chat_history]
195
  )
196
 
 
1
  import os
2
  import torch
3
  import gradio as gr
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, WhisperProcessor, WhisperForConditionalGeneration
5
+ from typing import List, Tuple # 新增:导入类型
6
 
7
+ # 方案 A:使用自定义环境变量名 "language"
8
  hf_token = os.environ.get("language")
9
  if not hf_token:
10
+ raise EnvironmentError("未找到名为 'language' 的环境变量,请在Space设置中添加")
11
+
12
+ # 方案 B:改用规范的 "HUGGINGFACE_HUB_TOKEN"(需同步修改Space环境变量)
13
+ # hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN")
14
+ # if not hf_token:
15
+ # raise EnvironmentError("未找到HUGGINGFACE_HUB_TOKEN环境变量,请在Space设置中添加")
16
 
17
  # 模型配置 - 使用公开模型
18
  MODELS = {
 
41
  device = "cuda" if torch.cuda.is_available() else "cpu"
42
  return model.to(device), tokenizer, device
43
 
 
 
44
  # 初始化模型
45
  loaded_models = {}
46
  for model_name in MODELS:
47
  loaded_models[model_name] = load_model(model_name)
48
 
49
+ # 构建对话提示词
50
+ def build_prompt(message: str, history: List[Tuple[str, str]], system_prompt: str, model_name: str) -> str:
51
+ if "Zephyr" in model_name:
 
52
  prompt = f"系统提示: {system_prompt}\n"
53
  for user_msg, assistant_msg in history:
54
  prompt += f"用户: {user_msg}\n助手: {assistant_msg}\n"
55
  prompt += f"用户: {message}\n助手:"
 
 
 
56
  elif "Falcon" in model_name:
57
  prompt = f"### System:\n{system_prompt}\n\n"
58
  for user_msg, assistant_msg in history:
59
  prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
60
  prompt += f"### User:\n{message}\n\n### Assistant:"
 
 
 
61
  else:
62
  prompt = f"[System] {system_prompt}\n"
63
  for user_msg, assistant_msg in history:
64
  prompt += f"[User] {user_msg}\n[Assistant] {assistant_msg}\n"
65
  prompt += f"[User] {message}\n[Assistant]"
66
+ return prompt
67
 
68
  # 模型推理函数
69
  def generate_response(
 
75
  temperature: float,
76
  top_p: float,
77
  top_k: int
78
+ ) -> str:
79
  model, tokenizer, device = loaded_models[model_name]
 
 
80
  full_prompt = build_prompt(message, history, system_prompt, model_name)
 
 
81
  inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
82
 
 
83
  generate_kwargs = {
84
  "max_new_tokens": max_new_tokens,
85
  "temperature": temperature,
 
90
  "pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id
91
  }
92
 
 
93
  with torch.no_grad():
94
+ output = model.generate(**inputs, **generate_kwargs)
 
 
 
 
 
95
  response = tokenizer.decode(output[0], skip_special_tokens=True)
96
+ return response[len(full_prompt):].strip()
 
 
 
 
97
 
98
  # 处理用户输入
99
  def process_chat(
 
105
  temperature: float,
106
  top_p: float,
107
  top_k: int
108
+ ) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
109
+ response = generate_response(message, history, system_prompt, model_name, max_new_tokens, temperature, top_p, top_k)
 
 
 
110
  history.append((message, response))
111
  return history, history
112
 
 
114
  asr = None
115
  if torch.cuda.is_available() or torch.backends.mps.is_available():
116
  try:
 
117
  processor = WhisperProcessor.from_pretrained("openai/whisper-base")
118
  asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
119
  asr = {"processor": processor, "model": asr_model}
120
+ except Exception as e:
121
+ print(f"语音模型加载失败: {e}")
122
  asr = None
123
 
124
+ def transcribe(audio) -> str:
125
  if asr is None:
126
  return "语音识别模型未加载"
127
  processor, model = asr["processor"], asr["model"]
 
168
  # 发送消息
169
  send_btn.click(
170
  fn=process_chat,
171
+ inputs=[message_input, chat_history, system_prompt, model_choice, max_new_tokens, temperature, top_p, top_k],
 
172
  outputs=[chat_history, chat_history]
173
  )
174