import gradio as gr import whisper from deep_translator import GoogleTranslator import nltk nltk.download('punkt') def transcribe_audio(audio, model_name): model = whisper.load_model(model_name) result = model.transcribe(audio) return result["text"] def translate_transcript(transcript_text, target_language, max_chunk_length=5000): print("Translating into", target_language) translator = GoogleTranslator(source='auto', target=target_language) # Split content into chunks that attempt to maintain context chunks = split_text_into_chunks(transcript_text, max_chunk_length) translated_chunks = [] for chunk in chunks: # Translate each chunk translated_chunks.append(translator.translate(chunk.strip())) # Join all translated chunks into a single string translated_text = ' '.join(translated_chunks) return translated_text def split_text_into_chunks(text, max_chunk_length): """ Helper function to split text into chunks that attempt to maintain context. """ # Split text into smaller chunks based on logical points (e.g., pauses, transitions) chunks = [] current_chunk = "" words = nltk.word_tokenize(text) for word in words: if len(current_chunk) + len(word) < max_chunk_length: current_chunk += word + " " else: if current_chunk: chunks.append(current_chunk.strip()) current_chunk = word + " " if current_chunk: chunks.append(current_chunk.strip()) return chunks # Example usage function def transcribe_and_translate(audio, target_language): if not target_language: target_language = "English" target_language_code = lang_name_to_code[target_language] # Transcribe audio transcript_text = transcribe_audio(audio, model_name="base") # Translate transcript to the target language translated_text = translate_transcript(transcript_text, target_language=target_language_code) return translated_text # List of top 10 widely used languages with their codes top_languages = [ ("English", "en"), ("Chinese", "zh"), ("Spanish", "es"), ("Hindi", "hi"), ("Arabic", "ar"), ("Portuguese", "pt"), ("Bengali", "bn"), ("Russian", "ru"), ("Japanese", "ja"), ("Punjabi", "pa"), ] lang_name_to_code = {name: code for name, code in top_languages} # Gradio interface demo = gr.Interface( fn=transcribe_and_translate, inputs=[ gr.Audio(type="filepath"), gr.Dropdown(choices=[lang[0] for lang in top_languages], label="Language") ], outputs="textbox", ) demo.launch()