Spaces:
Sleeping
Sleeping
File size: 2,690 Bytes
9157a3b 8e03dad 9157a3b 8e03dad 9157a3b 8e03dad 4287e46 9157a3b 4287e46 243b86d 8e03dad 4287e46 243b86d 4287e46 9157a3b 4287e46 9157a3b 243b86d 4287e46 243b86d 4287e46 243b86d 4287e46 243b86d 4287e46 8e03dad 4287e46 8e03dad 4287e46 8e03dad 4287e46 8e03dad 9157a3b 4287e46 9157a3b 4287e46 9157a3b 243b86d 9157a3b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | import gradio as gr
import whisper
from deep_translator import GoogleTranslator
import nltk
nltk.download('punkt')
def transcribe_audio(audio, model_name):
model = whisper.load_model(model_name)
result = model.transcribe(audio)
return result["text"]
def translate_transcript(transcript_text, target_language, max_chunk_length=5000):
print("Translating into", target_language)
translator = GoogleTranslator(source='auto', target=target_language)
# Split content into chunks that attempt to maintain context
chunks = split_text_into_chunks(transcript_text, max_chunk_length)
translated_chunks = []
for chunk in chunks:
# Translate each chunk
translated_chunks.append(translator.translate(chunk.strip()))
# Join all translated chunks into a single string
translated_text = ' '.join(translated_chunks)
return translated_text
def split_text_into_chunks(text, max_chunk_length):
"""
Helper function to split text into chunks that attempt to maintain context.
"""
# Split text into smaller chunks based on logical points (e.g., pauses, transitions)
chunks = []
current_chunk = ""
words = nltk.word_tokenize(text)
for word in words:
if len(current_chunk) + len(word) < max_chunk_length:
current_chunk += word + " "
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = word + " "
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
# Example usage function
def transcribe_and_translate(audio, target_language):
if not target_language:
target_language = "English"
target_language_code = lang_name_to_code[target_language]
# Transcribe audio
transcript_text = transcribe_audio(audio, model_name="base")
# Translate transcript to the target language
translated_text = translate_transcript(transcript_text, target_language=target_language_code)
return translated_text
# List of top 10 widely used languages with their codes
top_languages = [
("English", "en"),
("Chinese", "zh"),
("Spanish", "es"),
("Hindi", "hi"),
("Arabic", "ar"),
("Portuguese", "pt"),
("Bengali", "bn"),
("Russian", "ru"),
("Japanese", "ja"),
("Punjabi", "pa"),
]
lang_name_to_code = {name: code for name, code in top_languages}
# Gradio interface
demo = gr.Interface(
fn=transcribe_and_translate,
inputs=[
gr.Audio(type="filepath"),
gr.Dropdown(choices=[lang[0] for lang in top_languages], label="Language")
],
outputs="textbox",
)
demo.launch()
|