anaspro
commited on
Commit
·
1fcc5d5
1
Parent(s):
8c95942
Add voice recording capability - microphone input for direct voice-to-text
Browse files
app.py
CHANGED
|
@@ -243,25 +243,99 @@ examples = [
|
|
| 243 |
["انت موديل عراقي تحكي هعراقي فقط وتكون ترفيهي", 700]
|
| 244 |
]
|
| 245 |
|
| 246 |
-
# Create
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
if __name__ == "__main__":
|
|
|
|
| 267 |
demo.launch()
|
|
|
|
| 243 |
["انت موديل عراقي تحكي هعراقي فقط وتكون ترفيهي", 700]
|
| 244 |
]
|
| 245 |
|
| 246 |
+
# Create custom interface with voice recording
|
| 247 |
+
def create_interface():
|
| 248 |
+
with gr.Blocks(title="Shako IRAQI AI", theme=gr.themes.Soft()) as demo:
|
| 249 |
+
gr.Markdown("# Shako IRAQI AI 🤖")
|
| 250 |
+
gr.Markdown("تحدث مع الذكاء الاصطناعي العراقي - يدعم الصور والفيديو والصوت!")
|
| 251 |
+
|
| 252 |
+
chatbot = gr.Chatbot(type="messages", height=500)
|
| 253 |
+
|
| 254 |
+
with gr.Row():
|
| 255 |
+
with gr.Column(scale=4):
|
| 256 |
+
textbox = gr.MultimodalTextbox(
|
| 257 |
+
file_types=list(IMAGE_FILE_TYPES + VIDEO_FILE_TYPES + AUDIO_FILE_TYPES),
|
| 258 |
+
file_count="multiple",
|
| 259 |
+
placeholder="اكتب رسالتك هنا أو ارفع ملف...",
|
| 260 |
+
show_label=False,
|
| 261 |
+
autofocus=True,
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
with gr.Column(scale=1):
|
| 265 |
+
voice_input = gr.Audio(
|
| 266 |
+
sources=["microphone"],
|
| 267 |
+
type="filepath",
|
| 268 |
+
label="🎤 تسجيل صوتي",
|
| 269 |
+
show_label=True,
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
with gr.Accordion("⚙️ إعدادات متقدمة", open=False):
|
| 273 |
+
system_prompt = gr.Textbox(
|
| 274 |
+
label="System Prompt",
|
| 275 |
+
value="انت ذكاء صناعي يتحدث باللهجة العراقية بس ما تستخدم فصحى ابدا",
|
| 276 |
+
lines=2
|
| 277 |
+
)
|
| 278 |
+
max_tokens = gr.Slider(
|
| 279 |
+
label="Max New Tokens",
|
| 280 |
+
minimum=100,
|
| 281 |
+
maximum=2000,
|
| 282 |
+
step=10,
|
| 283 |
+
value=700
|
| 284 |
+
)
|
| 285 |
+
enable_voice = gr.Checkbox(
|
| 286 |
+
label="تفعيل الصوت في الردود",
|
| 287 |
+
value=False
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
def process_input(message, voice_file, history, system_prompt, max_tokens, enable_voice):
|
| 291 |
+
"""Process both text and voice inputs"""
|
| 292 |
+
if voice_file:
|
| 293 |
+
# If voice input is provided, create a message with the audio file
|
| 294 |
+
voice_message = {"files": [voice_file], "text": message.get("text", "")}
|
| 295 |
+
else:
|
| 296 |
+
voice_message = message
|
| 297 |
+
|
| 298 |
+
# Generate response
|
| 299 |
+
response_text = ""
|
| 300 |
+
for partial_response in generate(voice_message, history, system_prompt, max_tokens, enable_voice):
|
| 301 |
+
if isinstance(partial_response, dict):
|
| 302 |
+
# Handle audio response
|
| 303 |
+
response_text = partial_response["text"]
|
| 304 |
+
yield partial_response
|
| 305 |
+
else:
|
| 306 |
+
response_text = partial_response
|
| 307 |
+
yield partial_response
|
| 308 |
+
|
| 309 |
+
# Handle submission
|
| 310 |
+
textbox.submit(
|
| 311 |
+
fn=process_input,
|
| 312 |
+
inputs=[textbox, voice_input, chatbot, system_prompt, max_tokens, enable_voice],
|
| 313 |
+
outputs=[chatbot]
|
| 314 |
+
).then(
|
| 315 |
+
fn=lambda: None,
|
| 316 |
+
inputs=[],
|
| 317 |
+
outputs=[voice_input] # Clear voice input after submission
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
# Clear voice input when text is submitted
|
| 321 |
+
textbox.submit(
|
| 322 |
+
fn=lambda: None,
|
| 323 |
+
inputs=[],
|
| 324 |
+
outputs=[voice_input]
|
| 325 |
+
)
|
| 326 |
+
|
| 327 |
+
# Examples
|
| 328 |
+
gr.Examples(
|
| 329 |
+
examples=[
|
| 330 |
+
"مرحبا، كيف حالك؟",
|
| 331 |
+
"شرح لي عن الذكاء الاصطناعي",
|
| 332 |
+
"أخبرني نكتة عراقية"
|
| 333 |
+
],
|
| 334 |
+
inputs=[textbox]
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
return demo
|
| 338 |
|
| 339 |
if __name__ == "__main__":
|
| 340 |
+
demo = create_interface()
|
| 341 |
demo.launch()
|