Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import azure.cognitiveservices.speech as speechsdk | |
| import time | |
| # Azure credentials | |
| SPEECH_KEY = "vkv2VVTi1agTmU74Sz8C62mOymEHmoknwCaQEnEsroK1AE0B7xt9JQQJ99BDACI8hq2XJ3w3AAAYACOGrzMV" | |
| SERVICE_REGION = "switzerlandnorth" | |
| # Define the language and dialect mapping | |
| language_dialects = { | |
| "Arabic": { | |
| "Egypt": "ar-EG", | |
| "Saudi Arabia": "ar-SA", | |
| "United Arab Emirates": "ar-AE", | |
| "Bahrain": "ar-BH", | |
| "Algeria": "ar-DZ", | |
| "Iraq": "ar-IQ", | |
| "Jordan": "ar-JO", | |
| "Kuwait": "ar-KW", | |
| "Lebanon": "ar-LB", | |
| "Libya": "ar-LY", | |
| "Morocco": "ar-MA", | |
| "Oman": "ar-OM", | |
| "Palestinian Authority": "ar-PS", | |
| "Qatar": "ar-QA", | |
| "Syria": "ar-SY", | |
| "Tunisia": "ar-TN", | |
| "Yemen": "ar-YE" | |
| }, | |
| "English": { | |
| "United States": "en-US", | |
| "United Kingdom": "en-GB", | |
| "Australia": "en-AU", | |
| "Canada": "en-CA", | |
| "India": "en-IN", | |
| "Ireland": "en-IE", | |
| "New Zealand": "en-NZ", | |
| "South Africa": "en-ZA", | |
| "Singapore": "en-SG", | |
| "Philippines": "en-PH" | |
| }, | |
| "French": { | |
| "France": "fr-FR", | |
| "Canada": "fr-CA", | |
| "Switzerland": "fr-CH" | |
| }, | |
| "Spanish": { | |
| "Spain": "es-ES", | |
| "Mexico": "es-MX", | |
| "Argentina": "es-AR", | |
| "Colombia": "es-CO", | |
| "Chile": "es-CL", | |
| "Peru": "es-PE", | |
| "Venezuela": "es-VE" | |
| }, | |
| "German": { | |
| "Germany": "de-DE", | |
| "Austria": "de-AT", | |
| "Switzerland": "de-CH" | |
| }, | |
| "Portuguese": { | |
| "Portugal": "pt-PT", | |
| "Brazil": "pt-BR" | |
| }, | |
| "Chinese": { | |
| "Mainland China": "zh-CN", | |
| "Hong Kong": "zh-HK", | |
| "Taiwan": "zh-TW" | |
| }, | |
| "Italian": { | |
| "Italy": "it-IT" | |
| }, | |
| "Japanese": { | |
| "Japan": "ja-JP" | |
| }, | |
| "Korean": { | |
| "Korea": "ko-KR" | |
| } | |
| # Add more languages and dialects as needed | |
| } | |
| # Function to get dialects based on selected language | |
| def get_dialects(language): | |
| dialects = list(language_dialects.get(language, {}).keys()) | |
| return gr.update(choices=dialects, value=dialects[0] if dialects else None) | |
| # Function to transcribe audio | |
| def transcribe_audio(audio_file, duration, language, dialect): | |
| # Simulate recording duration | |
| print(f"Recording for {duration} seconds...") | |
| time.sleep(duration) | |
| # Get the locale code | |
| locale_code = language_dialects.get(language, {}).get(dialect, "en-US") | |
| print(f"Selected Locale Code: {locale_code}") | |
| # Set up speech recognition | |
| speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SERVICE_REGION) | |
| speech_config.speech_recognition_language = locale_code | |
| print(locale_code) | |
| audio_input = speechsdk.audio.AudioConfig(filename=audio_file) | |
| speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) | |
| result = speech_recognizer.recognize_once() | |
| if result.reason == speechsdk.ResultReason.RecognizedSpeech: | |
| return result.text | |
| elif result.reason == speechsdk.ResultReason.NoMatch: | |
| return "No speech could be recognized" | |
| elif result.reason == speechsdk.ResultReason.Canceled: | |
| cancellation_details = result.cancellation_details | |
| return f"Speech recognition canceled: {cancellation_details.error_details}" | |
| else: | |
| return "Unknown error occurred during speech recognition" | |
| # Create the Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Azure Speech to Text with Language and Dialect Selection") | |
| with gr.Row(): | |
| audio_input = gr.Audio(type="filepath", label="Upload Audio") | |
| duration_input = gr.Dropdown(choices=[5, 10], label="Recording Duration", value=5) | |
| with gr.Row(): | |
| language_input = gr.Dropdown(choices=list(language_dialects.keys()), label="Select Language") | |
| dialect_input = gr.Dropdown(choices=[], label="Select Dialect") | |
| transcribe_button = gr.Button("Transcribe") | |
| output_text = gr.Textbox(label="Transcription Result") | |
| # Update dialect options based on selected language | |
| language_input.change(fn=get_dialects, inputs=language_input, outputs=dialect_input) | |
| # Transcribe audio on button click | |
| transcribe_button.click(fn=transcribe_audio, inputs=[audio_input, duration_input, language_input, dialect_input], outputs=output_text) | |
| # Launch the app | |
| demo.launch() | |