Spaces:
Sleeping
Sleeping
refactored
Browse files
app.py
CHANGED
|
@@ -4,11 +4,20 @@ from huggingface_hub import InferenceClient
|
|
| 4 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
| 5 |
|
| 6 |
def process(audio, image):
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
|
|
|
|
|
|
| 12 |
def respond(
|
| 13 |
message,
|
| 14 |
history: list[tuple[str, str]],
|
|
@@ -16,7 +25,19 @@ def respond(
|
|
| 16 |
max_tokens,
|
| 17 |
temperature,
|
| 18 |
top_p,
|
|
|
|
|
|
|
| 19 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
messages = [{"role": "system", "content": system_message}]
|
| 21 |
|
| 22 |
for val in history:
|
|
@@ -40,42 +61,80 @@ def respond(
|
|
| 40 |
response += token
|
| 41 |
yield response
|
| 42 |
|
| 43 |
-
#
|
| 44 |
-
chatbot = gr.ChatInterface(
|
| 45 |
-
respond, # This should be the first positional argument (the chat function)
|
| 46 |
-
additional_inputs=[
|
| 47 |
-
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
| 48 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 49 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 50 |
-
gr.Slider(
|
| 51 |
-
minimum=0.1,
|
| 52 |
-
maximum=1.0,
|
| 53 |
-
value=0.95,
|
| 54 |
-
step=0.05,
|
| 55 |
-
label="Top-p (nucleus sampling)",
|
| 56 |
-
),
|
| 57 |
-
],
|
| 58 |
-
# These are now keyword arguments
|
| 59 |
-
chatbot=gr.Chatbot(),
|
| 60 |
-
textbox=gr.Textbox(placeholder="Type your message here..."),
|
| 61 |
-
title="Chat with Zephyr",
|
| 62 |
-
description="Upload audio/image and chat with AI",
|
| 63 |
-
examples=[["Hello"], ["How does this work?"]],
|
| 64 |
-
)
|
| 65 |
-
|
| 66 |
-
# Create separate interface for audio/image processing
|
| 67 |
with gr.Blocks() as app:
|
| 68 |
gr.Markdown("# ToDoAgent Multi-Modal Interface")
|
|
|
|
|
|
|
| 69 |
with gr.Tab("Chat"):
|
| 70 |
-
chatbot.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
with gr.Tab("Audio/Image Processing"):
|
| 72 |
-
gr.
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
)
|
| 80 |
|
| 81 |
if __name__ == "__main__":
|
|
|
|
| 4 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
| 5 |
|
| 6 |
def process(audio, image):
|
| 7 |
+
"""处理语音和图片的示例函数"""
|
| 8 |
+
if audio is not None:
|
| 9 |
+
sample_rate, audio_data = audio
|
| 10 |
+
audio_info = f"音频采样率: {sample_rate}Hz, 数据长度: {len(audio_data)}"
|
| 11 |
+
else:
|
| 12 |
+
audio_info = "未收到音频"
|
| 13 |
+
|
| 14 |
+
if image is not None:
|
| 15 |
+
image_info = f"图片尺寸: {image.shape}"
|
| 16 |
+
else:
|
| 17 |
+
image_info = "未收到图片"
|
| 18 |
|
| 19 |
+
return audio_info, image_info
|
| 20 |
+
|
| 21 |
def respond(
|
| 22 |
message,
|
| 23 |
history: list[tuple[str, str]],
|
|
|
|
| 25 |
max_tokens,
|
| 26 |
temperature,
|
| 27 |
top_p,
|
| 28 |
+
audio,
|
| 29 |
+
image
|
| 30 |
):
|
| 31 |
+
# 如果有上传的音频或图片,添加到消息中
|
| 32 |
+
if audio is not None:
|
| 33 |
+
# 这里可以添加音频处理逻辑
|
| 34 |
+
audio_sample_rate, audio_data = audio
|
| 35 |
+
message += f"\n[附加音频信息: 采样率 {audio_sample_rate}Hz, 时长 {len(audio_data)/audio_sample_rate:.2f}秒]"
|
| 36 |
+
|
| 37 |
+
if image is not None:
|
| 38 |
+
# 这里可以添加图片处理逻辑
|
| 39 |
+
message += f"\n[附加图片信息: 尺寸 {image.shape}]"
|
| 40 |
+
|
| 41 |
messages = [{"role": "system", "content": system_message}]
|
| 42 |
|
| 43 |
for val in history:
|
|
|
|
| 61 |
response += token
|
| 62 |
yield response
|
| 63 |
|
| 64 |
+
# 创建自定义的聊天界面
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
with gr.Blocks() as app:
|
| 66 |
gr.Markdown("# ToDoAgent Multi-Modal Interface")
|
| 67 |
+
|
| 68 |
+
# 创建两个标签页
|
| 69 |
with gr.Tab("Chat"):
|
| 70 |
+
chatbot = gr.Chatbot(height=500)
|
| 71 |
+
msg = gr.Textbox(label="输入消息", placeholder="输入您的问题...")
|
| 72 |
+
|
| 73 |
+
# 上传区域
|
| 74 |
+
with gr.Row():
|
| 75 |
+
audio_input = gr.Audio(label="上传语音", type="numpy", sources=["upload", "microphone"])
|
| 76 |
+
image_input = gr.Image(label="上传图片", type="numpy")
|
| 77 |
+
|
| 78 |
+
# 设置区域
|
| 79 |
+
with gr.Accordion("高级设置", open=False):
|
| 80 |
+
system_msg = gr.Textbox(value="You are a friendly Chatbot.", label="系统提示")
|
| 81 |
+
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="最大生成长度")
|
| 82 |
+
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="温度")
|
| 83 |
+
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
|
| 84 |
+
|
| 85 |
+
# 提交按钮
|
| 86 |
+
submit_btn = gr.Button("发送", variant="primary")
|
| 87 |
+
|
| 88 |
+
# 清除按钮
|
| 89 |
+
clear = gr.Button("清除聊天")
|
| 90 |
+
|
| 91 |
+
# 事件处理
|
| 92 |
+
def user(user_message, chat_history):
|
| 93 |
+
return "", chat_history + [[user_message, None]]
|
| 94 |
+
|
| 95 |
+
def bot(chat_history, system_message, max_tokens, temperature, top_p, audio, image):
|
| 96 |
+
# 获取最后一条用户消息
|
| 97 |
+
user_message = chat_history[-1][0]
|
| 98 |
+
|
| 99 |
+
# 生成响应
|
| 100 |
+
bot_response = ""
|
| 101 |
+
for response in respond(
|
| 102 |
+
user_message,
|
| 103 |
+
chat_history[:-1],
|
| 104 |
+
system_message,
|
| 105 |
+
max_tokens,
|
| 106 |
+
temperature,
|
| 107 |
+
top_p,
|
| 108 |
+
audio,
|
| 109 |
+
image
|
| 110 |
+
):
|
| 111 |
+
bot_response = response
|
| 112 |
+
chat_history[-1][1] = bot_response
|
| 113 |
+
yield chat_history
|
| 114 |
+
|
| 115 |
+
# 连接事件
|
| 116 |
+
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 117 |
+
bot, [chatbot, system_msg, max_tokens, temperature, top_p, audio_input, image_input], chatbot
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
submit_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
| 121 |
+
bot, [chatbot, system_msg, max_tokens, temperature, top_p, audio_input, image_input], chatbot
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
| 125 |
+
|
| 126 |
with gr.Tab("Audio/Image Processing"):
|
| 127 |
+
gr.Markdown("## 处理音频和图片")
|
| 128 |
+
audio_processor = gr.Audio(label="上传音频", type="numpy")
|
| 129 |
+
image_processor = gr.Image(label="上传图片", type="numpy")
|
| 130 |
+
process_btn = gr.Button("处理", variant="primary")
|
| 131 |
+
audio_output = gr.Textbox(label="音频信息")
|
| 132 |
+
image_output = gr.Textbox(label="图片信息")
|
| 133 |
+
|
| 134 |
+
process_btn.click(
|
| 135 |
+
process,
|
| 136 |
+
inputs=[audio_processor, image_processor],
|
| 137 |
+
outputs=[audio_output, image_output]
|
| 138 |
)
|
| 139 |
|
| 140 |
if __name__ == "__main__":
|