ranjeetsps commited on
Commit
8e03dad
·
verified ·
1 Parent(s): 243b86d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -21
app.py CHANGED
@@ -4,20 +4,17 @@ from deep_translator import GoogleTranslator
4
  import nltk
5
  nltk.download('punkt')
6
 
7
- def transcribe_audio(audio, model_name, output_file):
8
  model = whisper.load_model(model_name)
9
  result = model.transcribe(audio)
10
- with open(output_file, "w", encoding='utf-8') as f:
11
- f.write(result["text"])
12
 
13
- def translate_transcript(transcript_file, target_language, output_file, max_chunk_length=5000):
14
  print("Translating into", target_language)
15
  translator = GoogleTranslator(source='auto', target=target_language)
16
- with open(transcript_file, 'r', encoding='utf-8') as file:
17
- content = file.read()
18
 
19
  # Split content into chunks that attempt to maintain context
20
- chunks = split_text_into_chunks(content, max_chunk_length)
21
 
22
  translated_chunks = []
23
  for chunk in chunks:
@@ -27,10 +24,6 @@ def translate_transcript(transcript_file, target_language, output_file, max_chun
27
  # Join all translated chunks into a single string
28
  translated_text = ' '.join(translated_chunks)
29
 
30
- # Write the translated content to the output file
31
- with open(output_file, 'w', encoding='utf-8') as file:
32
- file.write(translated_text)
33
-
34
  return translated_text
35
 
36
  def split_text_into_chunks(text, max_chunk_length):
@@ -56,20 +49,18 @@ def split_text_into_chunks(text, max_chunk_length):
56
  return chunks
57
 
58
  # Example usage function
59
- def transcribe_and_translate(audio, target_language ):
60
- transcript_file = "transcript.txt"
61
- translated_file = "translated_file.txt"
62
- if not target_language :
63
- target_language ="English"
64
- target_language = lang_name_to_code[target_language]
65
 
66
- # Transcribe audio and save the transcript
67
- transcribe_audio(audio, model_name="base", output_file=transcript_file)
68
 
69
  # Translate transcript to the target language
70
- output = translate_transcript(transcript_file, target_language=target_language, output_file=translated_file)
71
 
72
- return output
73
 
74
  # List of top 10 widely used languages with their codes
75
  top_languages = [
 
4
  import nltk
5
  nltk.download('punkt')
6
 
7
+ def transcribe_audio(audio, model_name):
8
  model = whisper.load_model(model_name)
9
  result = model.transcribe(audio)
10
+ return result["text"]
 
11
 
12
+ def translate_transcript(transcript_text, target_language, max_chunk_length=5000):
13
  print("Translating into", target_language)
14
  translator = GoogleTranslator(source='auto', target=target_language)
 
 
15
 
16
  # Split content into chunks that attempt to maintain context
17
+ chunks = split_text_into_chunks(transcript_text, max_chunk_length)
18
 
19
  translated_chunks = []
20
  for chunk in chunks:
 
24
  # Join all translated chunks into a single string
25
  translated_text = ' '.join(translated_chunks)
26
 
 
 
 
 
27
  return translated_text
28
 
29
  def split_text_into_chunks(text, max_chunk_length):
 
49
  return chunks
50
 
51
  # Example usage function
52
+ def transcribe_and_translate(audio, target_language):
53
+ if not target_language:
54
+ target_language = "English"
55
+ target_language_code = lang_name_to_code[target_language]
 
 
56
 
57
+ # Transcribe audio
58
+ transcript_text = transcribe_audio(audio, model_name="base")
59
 
60
  # Translate transcript to the target language
61
+ translated_text = translate_transcript(transcript_text, target_language=target_language_code)
62
 
63
+ return translated_text
64
 
65
  # List of top 10 widely used languages with their codes
66
  top_languages = [