Spaces:

prasanacodes
/

Indic-Translation-Toolkit

Sleeping

App Files Files Community

prasanacodes commited on Aug 18, 2025

Commit

ac2b790

verified ·

1 Parent(s): 4fed755

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -0

app.py CHANGED Viewed

@@ -69,6 +69,55 @@ def transcribe_audio(audio_path):
         print(f"An error occurred during transcription: {e}")
         return f"Sorry, an error occurred. Please try again. Details: {str(e)}"
 def main_run(video_path):
     original_audio_file = extract_audio_from_video(video_path)
     original_text = transcribe_audio(original_audio_file)

         print(f"An error occurred during transcription: {e}")
         return f"Sorry, an error occurred. Please try again. Details: {str(e)}"
+def lang_select(target_lang):
+    LANGUAGE_NAME_TO_CODE = {
+        "Bengali": "bn-IN", "English": "en-IN", "Gujarati": "gu-IN",
+        "Hindi": "hi-IN", "Kannada": "kn-IN", "Malayalam": "ml-IN",
+        "Marathi": "mr-IN", "Odia": "or-IN", "Punjabi": "pa-IN",
+        "Tamil": "ta-IN", "Telugu": "te-IN"
+    }
+    return LANGUAGE_NAME_TO_CODE[target_lang]
+def translate_local(text_to_translate, target_lang='ta-IN', device=None):
+    """
+    Translates text from English to a target language, handling texts longer
+    than 500 characters by splitting them into sentence-based chunks.
+    """
+    # 1. Pre-process the text (same as your original code)
+    text_to_translate = re.sub(r'\d+', lambda match: num2words(int(match.group(0))), text_to_translate)
+    target_lang=lang_select(target_lang.capitalize())
+    # 2. Split the entire text into individual sentences
+    sentences = nltk.sent_tokenize(text_to_translate)
+    # 3. Group sentences into chunks under 500 characters
+    chunks = []
+    current_chunk = ""
+    for sentence in sentences:
+        # Check if adding the next sentence exceeds the limit
+        if len(current_chunk) + len(sentence) + 1 < 500:
+            current_chunk += sentence + " "
+        else:
+            # If it exceeds, add the current chunk to the list and start a new one
+            chunks.append(current_chunk.strip())
+            current_chunk = sentence + " "
+    # Add the last remaining chunk to the list
+    if current_chunk:
+        chunks.append(current_chunk.strip())
+    # 4. Translate each chunk and combine the results
+    translator = MyMemoryTranslator(source='en-GB', target="ta-IN")
+    translated_chunks = []
+    for chunk in chunks:
+        try:
+            translated_chunks.append(translator.translate(chunk))
+        except Exception as e:
+            print(f"Could not translate chunk: {chunk}\nError: {e}")
+            translated_chunks.append("") # Add an empty string on error
+    translated_text = " ".join(translated_chunks)
 def main_run(video_path):
     original_audio_file = extract_audio_from_video(video_path)
     original_text = transcribe_audio(original_audio_file)