Spaces:

xTHExBEASTx
/

srt

Sleeping

App Files Files Community

xTHExBEASTx commited on Feb 11

Commit

87aabc7

verified ·

1 Parent(s): 0da9887

Create app.py

Browse files

Files changed (1) hide show

app.py +86 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import gradio as gr
+import torch
+import re
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# 1. Setup Model and Device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_name = "facebook/nllb-200-distilled-600M"
+print(f"Loading {model_name} on {device}...")
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
+# Language Mapping (Expandable)
+LANG_MAP = {
+    "English": "eng_Latn",
+    "French": "fra_Latn",
+    "Spanish": "spa_Latn",
+    "German": "deu_Latn",
+    "Chinese (Simplified)": "zho_Hans",
+    "Japanese": "jpn_Jpan",
+    "Arabic": "ary_Arab",
+    "Russian": "rus_Cyrl"
+}
+def translate_text(text, target_lang_name):
+    if not text.strip(): return text
+    target_code = LANG_MAP.get(target_lang_name, "eng_Latn")
+    inputs = tokenizer(text, return_tensors="pt").to(device)
+    translated_tokens = model.generate(
+        **inputs,
+        forced_bos_token_id=tokenizer.lang_code_to_id[target_code],
+        max_length=256
+    )
+    return tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+def process_srt(file, target_lang):
+    if file is None: return None
+    try:
+        with open(file.name, 'r', encoding='utf-8') as f:
+            content = f.read()
+    except:
+        with open(file.name, 'r', encoding='latin-1') as f:
+            content = f.read()
+    # Split by SRT blocks while preserving whitespace
+    blocks = re.split(r'(\n\s*\n)', content)
+    translated_blocks = []
+    for block in blocks:
+        if not block.strip() or not any(c.isalpha() for c in block):
+            translated_blocks.append(block)
+            continue
+        lines = block.strip().splitlines()
+        if len(lines) >= 3:
+            index, timestamp = lines[0], lines[1]
+            text_to_translate = " ".join(lines[2:])
+            translated_text = translate_text(text_to_translate, target_lang)
+            translated_blocks.append(f"{index}\n{timestamp}\n{translated_text}")
+        else:
+            translated_blocks.append(block)
+    output_path = "translated_subtitles.srt"
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write("\n\n".join(translated_blocks))
+    return output_path
+# 3. Gradio Interface
+demo = gr.Interface(
+    fn=process_srt,
+    inputs=[
+        gr.File(label="Upload SRT File"),
+        gr.Dropdown(choices=list(LANG_MAP.keys()), value="English", label="Target Language")
+    ],
+    outputs=gr.File(label="Download Translated SRT"),
+    title="SRT Subtitle Translator",
+    description="Translates SRT files using NLLB-200. Optimized for Hugging Face Spaces.",
+    show_api=False # Prevents the API-doc crash on newer Gradio versions
+)
+if __name__ == "__main__":
+    demo.launch()