Spaces:

siyuwang541
/

ToDoAgent

Sleeping

App Files Files Community

siyuwang541 commited on Jun 3, 2025

Commit

ee36856

verified ·

1 Parent(s): 493364e

refactored

Browse files

Files changed (1) hide show

app.py +95 -36

app.py CHANGED Viewed

@@ -4,11 +4,20 @@ from huggingface_hub import InferenceClient
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def process(audio, image):
-    # Process audio and image (example: return file info)
-    audio_info = f"Audio sample rate: {audio[0]}, data length: {len(audio[1])}"
-    image_info = f"Image dimensions: {image.shape}"
-    return audio_info, image_info
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -16,7 +25,19 @@ def respond(
     max_tokens,
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
     for val in history:
@@ -40,42 +61,80 @@ def respond(
         response += token
         yield response
-# Corrected ChatInterface
-chatbot = gr.ChatInterface(
-    respond,  # This should be the first positional argument (the chat function)
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-    # These are now keyword arguments
-    chatbot=gr.Chatbot(),
-    textbox=gr.Textbox(placeholder="Type your message here..."),
-    title="Chat with Zephyr",
-    description="Upload audio/image and chat with AI",
-    examples=[["Hello"], ["How does this work?"]],
-)
-# Create separate interface for audio/image processing
 with gr.Blocks() as app:
     gr.Markdown("# ToDoAgent Multi-Modal Interface")
     with gr.Tab("Chat"):
-        chatbot.Interface()
     with gr.Tab("Audio/Image Processing"):
-        gr.Interface(
-            fn=process,
-            inputs=[
-                gr.Audio(label="Upload audio", type="filepath"),
-                gr.Image(label="Upload image", type="numpy")
-            ],
-            outputs=["text", "text"]
         )
 if __name__ == "__main__":

 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def process(audio, image):
+    """处理语音和图片的示例函数"""
+    if audio is not None:
+        sample_rate, audio_data = audio
+        audio_info = f"音频采样率: {sample_rate}Hz, 数据长度: {len(audio_data)}"
+    else:
+        audio_info = "未收到音频"
+    if image is not None:
+        image_info = f"图片尺寸: {image.shape}"
+    else:
+        image_info = "未收到图片"
+    return audio_info, image_info
 def respond(
     message,
     history: list[tuple[str, str]],
     max_tokens,
     temperature,
     top_p,
+    audio,
+    image
 ):
+    # 如果有上传的音频或图片，添加到消息中
+    if audio is not None:
+        # 这里可以添加音频处理逻辑
+        audio_sample_rate, audio_data = audio
+        message += f"\n[附加音频信息: 采样率 {audio_sample_rate}Hz, 时长 {len(audio_data)/audio_sample_rate:.2f}秒]"
+    if image is not None:
+        # 这里可以添加图片处理逻辑
+        message += f"\n[附加图片信息: 尺寸 {image.shape}]"
     messages = [{"role": "system", "content": system_message}]
     for val in history:
         response += token
         yield response
+# 创建自定义的聊天界面
 with gr.Blocks() as app:
     gr.Markdown("# ToDoAgent Multi-Modal Interface")
+    # 创建两个标签页
     with gr.Tab("Chat"):
+        chatbot = gr.Chatbot(height=500)
+        msg = gr.Textbox(label="输入消息", placeholder="输入您的问题...")
+        # 上传区域
+        with gr.Row():
+            audio_input = gr.Audio(label="上传语音", type="numpy", sources=["upload", "microphone"])
+            image_input = gr.Image(label="上传图片", type="numpy")
+        # 设置区域
+        with gr.Accordion("高级设置", open=False):
+            system_msg = gr.Textbox(value="You are a friendly Chatbot.", label="系统提示")
+            max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="最大生成长度")
+            temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="温度")
+            top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
+        # 提交按钮
+        submit_btn = gr.Button("发送", variant="primary")
+        # 清除按钮
+        clear = gr.Button("清除聊天")
+        # 事件处理
+        def user(user_message, chat_history):
+            return "", chat_history + [[user_message, None]]
+        def bot(chat_history, system_message, max_tokens, temperature, top_p, audio, image):
+            # 获取最后一条用户消息
+            user_message = chat_history[-1][0]
+            # 生成响应
+            bot_response = ""
+            for response in respond(
+                user_message,
+                chat_history[:-1],
+                system_message,
+                max_tokens,
+                temperature,
+                top_p,
+                audio,
+                image
+            ):
+                bot_response = response
+                chat_history[-1][1] = bot_response
+                yield chat_history
+        # 连接事件
+        msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot, [chatbot, system_msg, max_tokens, temperature, top_p, audio_input, image_input], chatbot
+        )
+        submit_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot, [chatbot, system_msg, max_tokens, temperature, top_p, audio_input, image_input], chatbot
+        )
+        clear.click(lambda: None, None, chatbot, queue=False)
     with gr.Tab("Audio/Image Processing"):
+        gr.Markdown("## 处理音频和图片")
+        audio_processor = gr.Audio(label="上传音频", type="numpy")
+        image_processor = gr.Image(label="上传图片", type="numpy")
+        process_btn = gr.Button("处理", variant="primary")
+        audio_output = gr.Textbox(label="音频信息")
+        image_output = gr.Textbox(label="图片信息")
+        process_btn.click(
+            process,
+            inputs=[audio_processor, image_processor],
+            outputs=[audio_output, image_output]
         )
 if __name__ == "__main__":