anaspro commited on
Commit
1fcc5d5
·
1 Parent(s): 8c95942

Add voice recording capability - microphone input for direct voice-to-text

Browse files
Files changed (1) hide show
  1. app.py +93 -19
app.py CHANGED
@@ -243,25 +243,99 @@ examples = [
243
  ["انت موديل عراقي تحكي هعراقي فقط وتكون ترفيهي", 700]
244
  ]
245
 
246
- # Create the chat interface
247
- demo = gr.ChatInterface(
248
- fn=generate,
249
- type="messages",
250
- textbox=gr.MultimodalTextbox(
251
- file_types=list(IMAGE_FILE_TYPES + VIDEO_FILE_TYPES + AUDIO_FILE_TYPES),
252
- file_count="multiple",
253
- autofocus=True,
254
- ),
255
- multimodal=True,
256
- additional_inputs=[
257
- gr.Textbox(label="System Prompt", value="انت ذكاء صناعي يتحدث باللهجة العراقية بس ما تستخدم فصحى ابدا"),
258
- gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
259
- gr.Checkbox(label="Enable Voice Output", value=False),
260
- ],
261
- title="Shako IRAQI AI",
262
- examples=examples,
263
- stop_btn=False,
264
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
  if __name__ == "__main__":
 
267
  demo.launch()
 
243
  ["انت موديل عراقي تحكي هعراقي فقط وتكون ترفيهي", 700]
244
  ]
245
 
246
+ # Create custom interface with voice recording
247
+ def create_interface():
248
+ with gr.Blocks(title="Shako IRAQI AI", theme=gr.themes.Soft()) as demo:
249
+ gr.Markdown("# Shako IRAQI AI 🤖")
250
+ gr.Markdown("تحدث مع الذكاء الاصطناعي العراقي - يدعم الصور والفيديو والصوت!")
251
+
252
+ chatbot = gr.Chatbot(type="messages", height=500)
253
+
254
+ with gr.Row():
255
+ with gr.Column(scale=4):
256
+ textbox = gr.MultimodalTextbox(
257
+ file_types=list(IMAGE_FILE_TYPES + VIDEO_FILE_TYPES + AUDIO_FILE_TYPES),
258
+ file_count="multiple",
259
+ placeholder="اكتب رسالتك هنا أو ارفع ملف...",
260
+ show_label=False,
261
+ autofocus=True,
262
+ )
263
+
264
+ with gr.Column(scale=1):
265
+ voice_input = gr.Audio(
266
+ sources=["microphone"],
267
+ type="filepath",
268
+ label="🎤 تسجيل صوتي",
269
+ show_label=True,
270
+ )
271
+
272
+ with gr.Accordion("⚙️ إعدادات متقدمة", open=False):
273
+ system_prompt = gr.Textbox(
274
+ label="System Prompt",
275
+ value="انت ذكاء صناعي يتحدث باللهجة العراقية بس ما تستخدم فصحى ابدا",
276
+ lines=2
277
+ )
278
+ max_tokens = gr.Slider(
279
+ label="Max New Tokens",
280
+ minimum=100,
281
+ maximum=2000,
282
+ step=10,
283
+ value=700
284
+ )
285
+ enable_voice = gr.Checkbox(
286
+ label="تفعيل الصوت في الردود",
287
+ value=False
288
+ )
289
+
290
+ def process_input(message, voice_file, history, system_prompt, max_tokens, enable_voice):
291
+ """Process both text and voice inputs"""
292
+ if voice_file:
293
+ # If voice input is provided, create a message with the audio file
294
+ voice_message = {"files": [voice_file], "text": message.get("text", "")}
295
+ else:
296
+ voice_message = message
297
+
298
+ # Generate response
299
+ response_text = ""
300
+ for partial_response in generate(voice_message, history, system_prompt, max_tokens, enable_voice):
301
+ if isinstance(partial_response, dict):
302
+ # Handle audio response
303
+ response_text = partial_response["text"]
304
+ yield partial_response
305
+ else:
306
+ response_text = partial_response
307
+ yield partial_response
308
+
309
+ # Handle submission
310
+ textbox.submit(
311
+ fn=process_input,
312
+ inputs=[textbox, voice_input, chatbot, system_prompt, max_tokens, enable_voice],
313
+ outputs=[chatbot]
314
+ ).then(
315
+ fn=lambda: None,
316
+ inputs=[],
317
+ outputs=[voice_input] # Clear voice input after submission
318
+ )
319
+
320
+ # Clear voice input when text is submitted
321
+ textbox.submit(
322
+ fn=lambda: None,
323
+ inputs=[],
324
+ outputs=[voice_input]
325
+ )
326
+
327
+ # Examples
328
+ gr.Examples(
329
+ examples=[
330
+ "مرحبا، كيف حالك؟",
331
+ "شرح لي عن الذكاء الاصطناعي",
332
+ "أخبرني نكتة عراقية"
333
+ ],
334
+ inputs=[textbox]
335
+ )
336
+
337
+ return demo
338
 
339
  if __name__ == "__main__":
340
+ demo = create_interface()
341
  demo.launch()