Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import whisper | |
| from deep_translator import GoogleTranslator | |
| import nltk | |
| nltk.download('punkt') | |
| def transcribe_audio(audio, model_name): | |
| model = whisper.load_model(model_name) | |
| result = model.transcribe(audio) | |
| return result["text"] | |
| def translate_transcript(transcript_text, target_language, max_chunk_length=5000): | |
| print("Translating into", target_language) | |
| translator = GoogleTranslator(source='auto', target=target_language) | |
| # Split content into chunks that attempt to maintain context | |
| chunks = split_text_into_chunks(transcript_text, max_chunk_length) | |
| translated_chunks = [] | |
| for chunk in chunks: | |
| # Translate each chunk | |
| translated_chunks.append(translator.translate(chunk.strip())) | |
| # Join all translated chunks into a single string | |
| translated_text = ' '.join(translated_chunks) | |
| return translated_text | |
| def split_text_into_chunks(text, max_chunk_length): | |
| """ | |
| Helper function to split text into chunks that attempt to maintain context. | |
| """ | |
| # Split text into smaller chunks based on logical points (e.g., pauses, transitions) | |
| chunks = [] | |
| current_chunk = "" | |
| words = nltk.word_tokenize(text) | |
| for word in words: | |
| if len(current_chunk) + len(word) < max_chunk_length: | |
| current_chunk += word + " " | |
| else: | |
| if current_chunk: | |
| chunks.append(current_chunk.strip()) | |
| current_chunk = word + " " | |
| if current_chunk: | |
| chunks.append(current_chunk.strip()) | |
| return chunks | |
| # Example usage function | |
| def transcribe_and_translate(audio, target_language): | |
| if not target_language: | |
| target_language = "English" | |
| target_language_code = lang_name_to_code[target_language] | |
| # Transcribe audio | |
| transcript_text = transcribe_audio(audio, model_name="base") | |
| # Translate transcript to the target language | |
| translated_text = translate_transcript(transcript_text, target_language=target_language_code) | |
| return translated_text | |
| # List of top 10 widely used languages with their codes | |
| top_languages = [ | |
| ("English", "en"), | |
| ("Chinese", "zh"), | |
| ("Spanish", "es"), | |
| ("Hindi", "hi"), | |
| ("Arabic", "ar"), | |
| ("Portuguese", "pt"), | |
| ("Bengali", "bn"), | |
| ("Russian", "ru"), | |
| ("Japanese", "ja"), | |
| ("Punjabi", "pa"), | |
| ] | |
| lang_name_to_code = {name: code for name, code in top_languages} | |
| # Gradio interface | |
| demo = gr.Interface( | |
| fn=transcribe_and_translate, | |
| inputs=[ | |
| gr.Audio(type="filepath"), | |
| gr.Dropdown(choices=[lang[0] for lang in top_languages], label="Language") | |
| ], | |
| outputs="textbox", | |
| ) | |
| demo.launch() | |