Spaces:

Softprodigy
/

Transcription

Sleeping

App Files Files Community

ranjeetsps commited on Jun 20, 2024

Commit

8e03dad

verified ·

1 Parent(s): 243b86d

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -21

app.py CHANGED Viewed

@@ -4,20 +4,17 @@ from deep_translator import GoogleTranslator
 import nltk
 nltk.download('punkt')
-def transcribe_audio(audio, model_name, output_file):
     model = whisper.load_model(model_name)
     result = model.transcribe(audio)
-    with open(output_file, "w", encoding='utf-8') as f:
-        f.write(result["text"])
-def translate_transcript(transcript_file, target_language, output_file, max_chunk_length=5000):
     print("Translating into", target_language)
     translator = GoogleTranslator(source='auto', target=target_language)
-    with open(transcript_file, 'r', encoding='utf-8') as file:
-        content = file.read()
     # Split content into chunks that attempt to maintain context
-    chunks = split_text_into_chunks(content, max_chunk_length)
     translated_chunks = []
     for chunk in chunks:
@@ -27,10 +24,6 @@ def translate_transcript(transcript_file, target_language, output_file, max_chun
     # Join all translated chunks into a single string
     translated_text = ' '.join(translated_chunks)
-    # Write the translated content to the output file
-    with open(output_file, 'w', encoding='utf-8') as file:
-        file.write(translated_text)
     return translated_text
 def split_text_into_chunks(text, max_chunk_length):
@@ -56,20 +49,18 @@ def split_text_into_chunks(text, max_chunk_length):
     return chunks
 # Example usage function
-def transcribe_and_translate(audio, target_language ):
-    transcript_file = "transcript.txt"
-    translated_file = "translated_file.txt"
-    if not  target_language :
-        target_language ="English"
-    target_language = lang_name_to_code[target_language]
-    # Transcribe audio and save the transcript
-    transcribe_audio(audio, model_name="base", output_file=transcript_file)
     # Translate transcript to the target language
-    output = translate_transcript(transcript_file, target_language=target_language, output_file=translated_file)
-    return output
 # List of top 10 widely used languages with their codes
 top_languages = [

 import nltk
 nltk.download('punkt')
+def transcribe_audio(audio, model_name):
     model = whisper.load_model(model_name)
     result = model.transcribe(audio)
+    return result["text"]
+def translate_transcript(transcript_text, target_language, max_chunk_length=5000):
     print("Translating into", target_language)
     translator = GoogleTranslator(source='auto', target=target_language)
     # Split content into chunks that attempt to maintain context
+    chunks = split_text_into_chunks(transcript_text, max_chunk_length)
     translated_chunks = []
     for chunk in chunks:
     # Join all translated chunks into a single string
     translated_text = ' '.join(translated_chunks)
     return translated_text
 def split_text_into_chunks(text, max_chunk_length):
     return chunks
 # Example usage function
+def transcribe_and_translate(audio, target_language):
+    if not target_language:
+        target_language = "English"
+    target_language_code = lang_name_to_code[target_language]
+    # Transcribe audio
+    transcript_text = transcribe_audio(audio, model_name="base")
     # Translate transcript to the target language
+    translated_text = translate_transcript(transcript_text, target_language=target_language_code)
+    return translated_text
 # List of top 10 widely used languages with their codes
 top_languages = [