siyuwang541 commited on
Commit
ee36856
·
verified ·
1 Parent(s): 493364e

refactored

Browse files
Files changed (1) hide show
  1. app.py +95 -36
app.py CHANGED
@@ -4,11 +4,20 @@ from huggingface_hub import InferenceClient
4
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
5
 
6
  def process(audio, image):
7
- # Process audio and image (example: return file info)
8
- audio_info = f"Audio sample rate: {audio[0]}, data length: {len(audio[1])}"
9
- image_info = f"Image dimensions: {image.shape}"
10
- return audio_info, image_info
 
 
 
 
 
 
 
11
 
 
 
12
  def respond(
13
  message,
14
  history: list[tuple[str, str]],
@@ -16,7 +25,19 @@ def respond(
16
  max_tokens,
17
  temperature,
18
  top_p,
 
 
19
  ):
 
 
 
 
 
 
 
 
 
 
20
  messages = [{"role": "system", "content": system_message}]
21
 
22
  for val in history:
@@ -40,42 +61,80 @@ def respond(
40
  response += token
41
  yield response
42
 
43
- # Corrected ChatInterface
44
- chatbot = gr.ChatInterface(
45
- respond, # This should be the first positional argument (the chat function)
46
- additional_inputs=[
47
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
48
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
49
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
50
- gr.Slider(
51
- minimum=0.1,
52
- maximum=1.0,
53
- value=0.95,
54
- step=0.05,
55
- label="Top-p (nucleus sampling)",
56
- ),
57
- ],
58
- # These are now keyword arguments
59
- chatbot=gr.Chatbot(),
60
- textbox=gr.Textbox(placeholder="Type your message here..."),
61
- title="Chat with Zephyr",
62
- description="Upload audio/image and chat with AI",
63
- examples=[["Hello"], ["How does this work?"]],
64
- )
65
-
66
- # Create separate interface for audio/image processing
67
  with gr.Blocks() as app:
68
  gr.Markdown("# ToDoAgent Multi-Modal Interface")
 
 
69
  with gr.Tab("Chat"):
70
- chatbot.Interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  with gr.Tab("Audio/Image Processing"):
72
- gr.Interface(
73
- fn=process,
74
- inputs=[
75
- gr.Audio(label="Upload audio", type="filepath"),
76
- gr.Image(label="Upload image", type="numpy")
77
- ],
78
- outputs=["text", "text"]
 
 
 
 
79
  )
80
 
81
  if __name__ == "__main__":
 
4
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
5
 
6
  def process(audio, image):
7
+ """处理语音和图片的示例函数"""
8
+ if audio is not None:
9
+ sample_rate, audio_data = audio
10
+ audio_info = f"音频采样率: {sample_rate}Hz, 数据长度: {len(audio_data)}"
11
+ else:
12
+ audio_info = "未收到音频"
13
+
14
+ if image is not None:
15
+ image_info = f"图片尺寸: {image.shape}"
16
+ else:
17
+ image_info = "未收到图片"
18
 
19
+ return audio_info, image_info
20
+
21
  def respond(
22
  message,
23
  history: list[tuple[str, str]],
 
25
  max_tokens,
26
  temperature,
27
  top_p,
28
+ audio,
29
+ image
30
  ):
31
+ # 如果有上传的音频或图片,添加到消息中
32
+ if audio is not None:
33
+ # 这里可以添加音频处理逻辑
34
+ audio_sample_rate, audio_data = audio
35
+ message += f"\n[附加音频信息: 采样率 {audio_sample_rate}Hz, 时长 {len(audio_data)/audio_sample_rate:.2f}秒]"
36
+
37
+ if image is not None:
38
+ # 这里可以添加图片处理逻辑
39
+ message += f"\n[附加图片信息: 尺寸 {image.shape}]"
40
+
41
  messages = [{"role": "system", "content": system_message}]
42
 
43
  for val in history:
 
61
  response += token
62
  yield response
63
 
64
+ # 创建自定义的聊天界面
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  with gr.Blocks() as app:
66
  gr.Markdown("# ToDoAgent Multi-Modal Interface")
67
+
68
+ # 创建两个标签页
69
  with gr.Tab("Chat"):
70
+ chatbot = gr.Chatbot(height=500)
71
+ msg = gr.Textbox(label="输入消息", placeholder="输入您的问题...")
72
+
73
+ # 上传区域
74
+ with gr.Row():
75
+ audio_input = gr.Audio(label="上传语音", type="numpy", sources=["upload", "microphone"])
76
+ image_input = gr.Image(label="上传图片", type="numpy")
77
+
78
+ # 设置区域
79
+ with gr.Accordion("高级设置", open=False):
80
+ system_msg = gr.Textbox(value="You are a friendly Chatbot.", label="系统提示")
81
+ max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="最大生成长度")
82
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="温度")
83
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
84
+
85
+ # 提交按钮
86
+ submit_btn = gr.Button("发送", variant="primary")
87
+
88
+ # 清除按钮
89
+ clear = gr.Button("清除聊天")
90
+
91
+ # 事件处理
92
+ def user(user_message, chat_history):
93
+ return "", chat_history + [[user_message, None]]
94
+
95
+ def bot(chat_history, system_message, max_tokens, temperature, top_p, audio, image):
96
+ # 获取最后一条用户消息
97
+ user_message = chat_history[-1][0]
98
+
99
+ # 生成响应
100
+ bot_response = ""
101
+ for response in respond(
102
+ user_message,
103
+ chat_history[:-1],
104
+ system_message,
105
+ max_tokens,
106
+ temperature,
107
+ top_p,
108
+ audio,
109
+ image
110
+ ):
111
+ bot_response = response
112
+ chat_history[-1][1] = bot_response
113
+ yield chat_history
114
+
115
+ # 连接事件
116
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
117
+ bot, [chatbot, system_msg, max_tokens, temperature, top_p, audio_input, image_input], chatbot
118
+ )
119
+
120
+ submit_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
121
+ bot, [chatbot, system_msg, max_tokens, temperature, top_p, audio_input, image_input], chatbot
122
+ )
123
+
124
+ clear.click(lambda: None, None, chatbot, queue=False)
125
+
126
  with gr.Tab("Audio/Image Processing"):
127
+ gr.Markdown("## 处理音频和图片")
128
+ audio_processor = gr.Audio(label="上传音频", type="numpy")
129
+ image_processor = gr.Image(label="上传图片", type="numpy")
130
+ process_btn = gr.Button("处理", variant="primary")
131
+ audio_output = gr.Textbox(label="音频信息")
132
+ image_output = gr.Textbox(label="图片信息")
133
+
134
+ process_btn.click(
135
+ process,
136
+ inputs=[audio_processor, image_processor],
137
+ outputs=[audio_output, image_output]
138
  )
139
 
140
  if __name__ == "__main__":