Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from src.response.gpt import gpt_response | |
| from src.SpeechToText.sr import transcribe_audio, clear_history | |
| from src.SpeechToText.hamsa import transcribe_audio_hamsa | |
| from datetime import datetime | |
| from loguru import logger | |
| # Create Gradio Interface | |
| with gr.Blocks(title="Multilingual Speech to Text") as iface: | |
| gr.Markdown("# ποΈ Multilingual Speech to Text (Arabic & English)") | |
| gr.Markdown("Speak in Arabic or English, or let the system auto-detect the language!") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| language_selector = gr.Dropdown( | |
| choices=[ | |
| "English", | |
| "Arabic", | |
| "Arabic (Egypt)", | |
| "Arabic (UAE)", | |
| "Arabic (Lebanon)", | |
| "Arabic (Saudi Arabia)", | |
| "Arabic (Kuwait)", | |
| "Arabic (Jordan)", | |
| "Auto-detect" | |
| ], | |
| value="Auto-detect", | |
| label="Select Language" | |
| ) | |
| audio_input = gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="filepath", | |
| label="π€ Speak or Upload Audio" | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button("π Transcribe", variant="primary") | |
| clear_btn = gr.Button("ποΈ Clear History", variant="secondary") | |
| with gr.Column(scale=1): | |
| current_output = gr.Textbox( | |
| label="Current Transcription", | |
| placeholder="Your transcribed text will appear here...", | |
| lines=3, | |
| rtl=True # Right-to-left for Arabic text | |
| ) | |
| gpt_output = gr.Textbox( | |
| label="AI Therapeutic Response", | |
| placeholder="AI response will appear here...", | |
| lines=5, | |
| rtl=True, | |
| interactive=False | |
| ) | |
| history_output = gr.Textbox( | |
| label="Conversation History", | |
| placeholder="All transcriptions will be saved here with timestamps...", | |
| lines=10, | |
| max_lines=20, | |
| interactive=False | |
| ) | |
| # State to maintain history | |
| history_state = gr.State("") | |
| # Function to process transcription and get GPT response | |
| def process_audio_and_respond(audio, language, history): | |
| # Get transcription | |
| try: | |
| updated_history, current_text = transcribe_audio(audio, language, history) | |
| logger.info(f"Transcription successful: {current_text}") | |
| except Exception as e: | |
| updated_history, current_text = transcribe_audio_hamsa(audio, language, history) | |
| logger.error(f"Transcription failed. Apply Fallback with Hamsa API: {e}") | |
| if not current_text: | |
| current_text = "Transcription failed. Please try again." | |
| # Get GPT response if there's transcribed text | |
| gpt_result = "" | |
| if current_text and current_text.strip(): | |
| response = gpt_response(current_text) | |
| gpt_result = f"Response: {response['response']} \n\nEmotion: {response['emotional_state']}" | |
| # Update history with both query and answer | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| detected_lang = response.get('detected_language', 'Unknown') | |
| # Format the history entry | |
| history_entry = f"[{timestamp}] [{language}] [{detected_lang}]\n" | |
| history_entry += f"Query: {current_text}\n" | |
| history_entry += f"Answer: {response['response']}" | |
| history_entry += "-----------------------\n\n" | |
| # Add to history | |
| if updated_history: | |
| updated_history = history_entry + updated_history | |
| else: | |
| updated_history = history_entry | |
| return updated_history, current_text, gpt_result | |
| # Event handlers | |
| submit_btn.click( | |
| fn=process_audio_and_respond, | |
| inputs=[audio_input, language_selector, history_state], | |
| outputs=[history_state, current_output, gpt_output] | |
| ).then( | |
| fn=lambda h: h, | |
| inputs=[history_state], | |
| outputs=[history_output] | |
| ) | |
| clear_btn.click( | |
| fn=clear_history, | |
| outputs=[history_state, history_output] | |
| ) | |
| # Auto-submit when audio is uploaded/recorded | |
| audio_input.change( | |
| fn=process_audio_and_respond, | |
| inputs=[audio_input, language_selector, history_state], | |
| outputs=[history_state, current_output, gpt_output] | |
| ).then( | |
| fn=lambda h: h, | |
| inputs=[history_state], | |
| outputs=[history_output] | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch(server_name="0.0.0.0", server_port=7860, share=True) |