Spaces:

anaspro
/

chatbox

Runtime error

App Files Files Community

anaspro commited on Oct 29

Commit

1fcc5d5

1 Parent(s): 8c95942

Add voice recording capability - microphone input for direct voice-to-text

Browse files

Files changed (1) hide show

app.py +93 -19

app.py CHANGED Viewed

@@ -243,25 +243,99 @@ examples = [
     ["انت موديل عراقي تحكي هعراقي فقط وتكون ترفيهي", 700]
 ]
-# Create the chat interface
-demo = gr.ChatInterface(
-    fn=generate,
-    type="messages",
-    textbox=gr.MultimodalTextbox(
-        file_types=list(IMAGE_FILE_TYPES + VIDEO_FILE_TYPES + AUDIO_FILE_TYPES),
-        file_count="multiple",
-        autofocus=True,
-    ),
-    multimodal=True,
-    additional_inputs=[
-        gr.Textbox(label="System Prompt", value="انت ذكاء صناعي يتحدث باللهجة العراقية بس ما تستخدم فصحى ابدا"),
-        gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
-        gr.Checkbox(label="Enable Voice Output", value=False),
-    ],
-    title="Shako IRAQI AI",
-    examples=examples,
-    stop_btn=False,
-)
 if __name__ == "__main__":
     demo.launch()

     ["انت موديل عراقي تحكي هعراقي فقط وتكون ترفيهي", 700]
 ]
+# Create custom interface with voice recording
+def create_interface():
+    with gr.Blocks(title="Shako IRAQI AI", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# Shako IRAQI AI 🤖")
+        gr.Markdown("تحدث مع الذكاء الاصطناعي العراقي - يدعم الصور والفيديو والصوت!")
+        chatbot = gr.Chatbot(type="messages", height=500)
+        with gr.Row():
+            with gr.Column(scale=4):
+                textbox = gr.MultimodalTextbox(
+                    file_types=list(IMAGE_FILE_TYPES + VIDEO_FILE_TYPES + AUDIO_FILE_TYPES),
+                    file_count="multiple",
+                    placeholder="اكتب رسالتك هنا أو ارفع ملف...",
+                    show_label=False,
+                    autofocus=True,
+                )
+            with gr.Column(scale=1):
+                voice_input = gr.Audio(
+                    sources=["microphone"],
+                    type="filepath",
+                    label="🎤 تسجيل صوتي",
+                    show_label=True,
+                )
+        with gr.Accordion("⚙️ إعدادات متقدمة", open=False):
+            system_prompt = gr.Textbox(
+                label="System Prompt",
+                value="انت ذكاء صناعي يتحدث باللهجة العراقية بس ما تستخدم فصحى ابدا",
+                lines=2
+            )
+            max_tokens = gr.Slider(
+                label="Max New Tokens",
+                minimum=100,
+                maximum=2000,
+                step=10,
+                value=700
+            )
+            enable_voice = gr.Checkbox(
+                label="تفعيل الصوت في الردود",
+                value=False
+            )
+        def process_input(message, voice_file, history, system_prompt, max_tokens, enable_voice):
+            """Process both text and voice inputs"""
+            if voice_file:
+                # If voice input is provided, create a message with the audio file
+                voice_message = {"files": [voice_file], "text": message.get("text", "")}
+            else:
+                voice_message = message
+            # Generate response
+            response_text = ""
+            for partial_response in generate(voice_message, history, system_prompt, max_tokens, enable_voice):
+                if isinstance(partial_response, dict):
+                    # Handle audio response
+                    response_text = partial_response["text"]
+                    yield partial_response
+                else:
+                    response_text = partial_response
+                    yield partial_response
+        # Handle submission
+        textbox.submit(
+            fn=process_input,
+            inputs=[textbox, voice_input, chatbot, system_prompt, max_tokens, enable_voice],
+            outputs=[chatbot]
+        ).then(
+            fn=lambda: None,
+            inputs=[],
+            outputs=[voice_input]  # Clear voice input after submission
+        )
+        # Clear voice input when text is submitted
+        textbox.submit(
+            fn=lambda: None,
+            inputs=[],
+            outputs=[voice_input]
+        )
+        # Examples
+        gr.Examples(
+            examples=[
+                "مرحبا، كيف حالك؟",
+                "شرح لي عن الذكاء الاصطناعي",
+                "أخبرني نكتة عراقية"
+            ],
+            inputs=[textbox]
+        )
+    return demo
 if __name__ == "__main__":
+    demo = create_interface()
     demo.launch()