prasanacodes commited on
Commit
ac2b790
·
verified ·
1 Parent(s): 4fed755

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py CHANGED
@@ -69,6 +69,55 @@ def transcribe_audio(audio_path):
69
  print(f"An error occurred during transcription: {e}")
70
  return f"Sorry, an error occurred. Please try again. Details: {str(e)}"
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def main_run(video_path):
73
  original_audio_file = extract_audio_from_video(video_path)
74
  original_text = transcribe_audio(original_audio_file)
 
69
  print(f"An error occurred during transcription: {e}")
70
  return f"Sorry, an error occurred. Please try again. Details: {str(e)}"
71
 
72
+ def lang_select(target_lang):
73
+ LANGUAGE_NAME_TO_CODE = {
74
+ "Bengali": "bn-IN", "English": "en-IN", "Gujarati": "gu-IN",
75
+ "Hindi": "hi-IN", "Kannada": "kn-IN", "Malayalam": "ml-IN",
76
+ "Marathi": "mr-IN", "Odia": "or-IN", "Punjabi": "pa-IN",
77
+ "Tamil": "ta-IN", "Telugu": "te-IN"
78
+ }
79
+ return LANGUAGE_NAME_TO_CODE[target_lang]
80
+
81
+ def translate_local(text_to_translate, target_lang='ta-IN', device=None):
82
+ """
83
+ Translates text from English to a target language, handling texts longer
84
+ than 500 characters by splitting them into sentence-based chunks.
85
+ """
86
+ # 1. Pre-process the text (same as your original code)
87
+ text_to_translate = re.sub(r'\d+', lambda match: num2words(int(match.group(0))), text_to_translate)
88
+ target_lang=lang_select(target_lang.capitalize())
89
+
90
+ # 2. Split the entire text into individual sentences
91
+ sentences = nltk.sent_tokenize(text_to_translate)
92
+
93
+ # 3. Group sentences into chunks under 500 characters
94
+ chunks = []
95
+ current_chunk = ""
96
+ for sentence in sentences:
97
+ # Check if adding the next sentence exceeds the limit
98
+ if len(current_chunk) + len(sentence) + 1 < 500:
99
+ current_chunk += sentence + " "
100
+ else:
101
+ # If it exceeds, add the current chunk to the list and start a new one
102
+ chunks.append(current_chunk.strip())
103
+ current_chunk = sentence + " "
104
+
105
+ # Add the last remaining chunk to the list
106
+ if current_chunk:
107
+ chunks.append(current_chunk.strip())
108
+
109
+ # 4. Translate each chunk and combine the results
110
+ translator = MyMemoryTranslator(source='en-GB', target="ta-IN")
111
+ translated_chunks = []
112
+ for chunk in chunks:
113
+ try:
114
+ translated_chunks.append(translator.translate(chunk))
115
+ except Exception as e:
116
+ print(f"Could not translate chunk: {chunk}\nError: {e}")
117
+ translated_chunks.append("") # Add an empty string on error
118
+
119
+ translated_text = " ".join(translated_chunks)
120
+
121
  def main_run(video_path):
122
  original_audio_file = extract_audio_from_video(video_path)
123
  original_text = transcribe_audio(original_audio_file)