xTHExBEASTx commited on
Commit
b1f04ee
·
verified ·
1 Parent(s): 30fca35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -53
app.py CHANGED
@@ -1,88 +1,174 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
  import srt
4
  import torch
5
  import os
 
 
6
 
7
- # --- Configuration ---
8
- MODEL_CHECKPOINT = "facebook/nllb-200-distilled-1.3B"
9
- SRC_LANG = "eng_Latn"
10
- TGT_LANG = "arb_Arab"
 
11
 
12
- # --- Load Model Directly ---
13
- print("Loading model...")
14
- # We use the tokenizer to convert text to numbers
15
- tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)
16
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_CHECKPOINT)
17
- print("Model loaded!")
18
 
19
- def batch_translate(texts, batch_size=8):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  results = []
 
21
 
22
- # Set the source language
23
- tokenizer.src_lang = SRC_LANG
24
-
25
- for i in range(0, len(texts), batch_size):
26
- batch = texts[i : i + batch_size]
27
-
28
- # 1. Tokenize the batch
29
- inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
30
 
31
- # 2. Get the Target Language ID properly (The Fix)
32
- # We use convert_tokens_to_ids() instead of accessing the internal dictionary
33
- forced_bos_token_id = tokenizer.convert_tokens_to_ids(TGT_LANG)
34
 
35
- # 3. Generate translation
36
  with torch.no_grad():
37
- generated_tokens = model.generate(
38
  **inputs,
39
  forced_bos_token_id=forced_bos_token_id,
40
  max_length=512
41
  )
42
 
43
- # 4. Decode the results
44
- batch_results = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
45
  results.extend(batch_results)
46
-
47
  return results
48
 
49
- def process_srt(filepath):
50
- if filepath is None:
51
- return None
52
-
53
  try:
54
  with open(filepath, 'r', encoding='utf-8') as f:
55
  content = f.read()
56
- subtitle_generator = srt.parse(content)
57
- subtitles = list(subtitle_generator)
58
  except Exception as e:
59
- return f"Error parsing SRT: {str(e)}"
60
 
61
- # Translate content
62
- texts_to_translate = [sub.content for sub in subtitles]
63
- translated_texts = batch_translate(texts_to_translate)
64
 
65
- # Update subtitles
66
- for sub, trans_text in zip(subtitles, translated_texts):
67
- sub.content = trans_text
68
 
69
- # Save output
70
- output_path = "translated_subtitles.srt"
71
- with open(output_path, 'w', encoding='utf-8') as f:
72
  f.write(srt.compose(subtitles))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- return output_path
 
 
 
 
 
 
 
75
 
76
- # --- Gradio Interface ---
77
- with gr.Blocks(title="NLLB SRT Translator") as demo:
78
- gr.Markdown("# 🇬🇧 English to 🇸🇦 Arabic SRT Translator")
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- with gr.Row():
81
- input_file = gr.File(label="Upload English SRT", file_types=[".srt"])
82
- output_file = gr.File(label="Download Arabic SRT")
 
 
 
 
 
 
 
 
83
 
84
- btn = gr.Button("Translate", variant="primary")
85
- btn.click(fn=process_srt, inputs=input_file, outputs=output_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  if __name__ == "__main__":
88
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
  import srt
4
  import torch
5
  import os
6
+ import math
7
+ from datetime import timedelta
8
 
9
+ # --- إعدادات الموديلات ---
10
+ # 1. موديل الترجمة (NLLB)
11
+ TRANSLATION_MODEL = "facebook/nllb-200-distilled-1.3B"
12
+ # 2. موديل تفريغ الصوت (Whisper المطور)
13
+ WHISPER_MODEL = "distil-whisper/distil-large-v3"
14
 
15
+ print("Jari Tahmeel Al-Models... (Loading Models...)")
 
 
 
 
 
16
 
17
+ # --- تحميل موديل الترجمة (NLLB) ---
18
+ tokenizer_nllb = AutoTokenizer.from_pretrained(TRANSLATION_MODEL)
19
+ model_nllb = AutoModelForSeq2SeqLM.from_pretrained(TRANSLATION_MODEL)
20
+
21
+ # --- تحميل موديل الصوت (Whisper) ---
22
+ # نستخدم chunk_length_s لتقسيم الصوت الطويل
23
+ whisper_pipe = pipeline(
24
+ "automatic-speech-recognition",
25
+ model=WHISPER_MODEL,
26
+ torch_dtype=torch.float32,
27
+ device="cpu",
28
+ chunk_length_s=30,
29
+ stride_length_s=5,
30
+ )
31
+
32
+ print("Tam Tahmeel Al-Models Binajah! (Models Loaded!)")
33
+
34
+ # ---------------------------------------------------------
35
+ # الجزء الأول: دوال الترجمة (NLLB Logic)
36
+ # ---------------------------------------------------------
37
+ def batch_translate(texts, src_lang, tgt_lang, batch_size=8, progress=gr.Progress()):
38
  results = []
39
+ tokenizer_nllb.src_lang = src_lang
40
 
41
+ total_batches = (len(texts) + batch_size - 1) // batch_size
42
+
43
+ for i, start_idx in enumerate(range(0, len(texts), batch_size)):
44
+ # progress(i / total_batches, desc=f"Translating batch {i+1}/{total_batches}")
45
+ batch = texts[start_idx : start_idx + batch_size]
 
 
 
46
 
47
+ inputs = tokenizer_nllb(batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
48
+ forced_bos_token_id = tokenizer_nllb.convert_tokens_to_ids(tgt_lang)
 
49
 
 
50
  with torch.no_grad():
51
+ generated_tokens = model_nllb.generate(
52
  **inputs,
53
  forced_bos_token_id=forced_bos_token_id,
54
  max_length=512
55
  )
56
 
57
+ batch_results = tokenizer_nllb.batch_decode(generated_tokens, skip_special_tokens=True)
 
58
  results.extend(batch_results)
 
59
  return results
60
 
61
+ def process_translation(filepath, src_lang_code, tgt_lang_code):
62
+ if filepath is None: return None
 
 
63
  try:
64
  with open(filepath, 'r', encoding='utf-8') as f:
65
  content = f.read()
66
+ subtitles = list(srt.parse(content))
 
67
  except Exception as e:
68
+ return f"Error: {str(e)}"
69
 
70
+ texts = [sub.content for sub in subtitles]
71
+ translated = batch_translate(texts, src_lang_code, tgt_lang_code)
 
72
 
73
+ for sub, trans in zip(subtitles, translated):
74
+ sub.content = trans
 
75
 
76
+ out_path = "translated_subtitles.srt"
77
+ with open(out_path, 'w', encoding='utf-8') as f:
 
78
  f.write(srt.compose(subtitles))
79
+ return out_path
80
+
81
+ # ---------------------------------------------------------
82
+ # الجزء الثاني: دوال استخراج الصوت (Whisper Logic)
83
+ # ---------------------------------------------------------
84
+ def format_timestamp(seconds):
85
+ td = timedelta(seconds=seconds)
86
+ # تنسيق SRT يتطلب ساعات:دقائق:ثواني,مللي
87
+ total_seconds = int(td.total_seconds())
88
+ hours = total_seconds // 3600
89
+ minutes = (total_seconds % 3600) // 60
90
+ secs = total_seconds % 60
91
+ millis = int(td.microseconds / 1000)
92
+ return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}"
93
+
94
+ def video_to_srt(video_path, progress=gr.Progress()):
95
+ if video_path is None: return None
96
+
97
+ progress(0.1, desc="Extracting Audio & Transcribing...")
98
 
99
+ # تشغيل الـ Whisper Pipeline
100
+ # نطلب منه إرجاع الطوابع الزمنية (timestamps)
101
+ outputs = whisper_pipe(video_path, return_timestamps=True, generate_kwargs={"language": "english"})
102
+
103
+ chunks = outputs.get("chunks", [])
104
+ if not chunks:
105
+ # أحيانًا يكون المخرج نصًا كاملاً إذا كان الفيديو قصيرًا جدًا
106
+ chunks = [{"text": outputs.get("text", ""), "timestamp": (0.0, 5.0)}]
107
 
108
+ progress(0.8, desc="Formatting SRT...")
109
+
110
+ # تحويل مخرجات ويسبر إلى صيغة SRT
111
+ srt_subtitles = []
112
+ for i, chunk in enumerate(chunks):
113
+ text = chunk['text'].strip()
114
+ start, end = chunk['timestamp']
115
+
116
+ # حماية في حال كان الـ end غير موجود (None)
117
+ if end is None: end = start + 5.0
118
+
119
+ srt_subtitles.append(
120
+ srt.Subtitle(index=i+1, start=timedelta(seconds=start), end=timedelta(seconds=end), content=text)
121
+ )
122
 
123
+ out_path = "generated_captions.srt"
124
+ with open(out_path, 'w', encoding='utf-8') as f:
125
+ f.write(srt.compose(srt_subtitles))
126
+
127
+ return out_path
128
+
129
+ # ---------------------------------------------------------
130
+ # واجهة المستخدم (Gradio Tabs)
131
+ # ---------------------------------------------------------
132
+ with gr.Blocks(title="The Ultimate Subtitler") as demo:
133
+ gr.Markdown("# 🎥 The Ultimate Subtitle Tool")
134
 
135
+ with gr.Tabs():
136
+ # --- التبويب الأول: من فيديو إلى SRT ---
137
+ with gr.TabItem("Step 1: Video to SRT (Whisper)"):
138
+ gr.Markdown("### استخرج ملف الترجمة الإنجليزية من أي فيديو")
139
+ with gr.Row():
140
+ video_input = gr.Video(label="Upload Video")
141
+ srt_output_gen = gr.File(label="Generated English SRT")
142
+
143
+ gen_btn = gr.Button("Generate SRT from Video", variant="primary")
144
+ gen_btn.click(video_to_srt, inputs=video_input, outputs=srt_output_gen)
145
+
146
+ # --- التبويب الثاني: ترجمة الـ SRT ---
147
+ with gr.TabItem("Step 2: Translate SRT (NLLB)"):
148
+ gr.Markdown("### ترجم ملف الـ SRT إلى العربية (أو لغات أخرى)")
149
+
150
+ with gr.Row():
151
+ srt_input = gr.File(label="Upload SRT File (English)")
152
+
153
+ with gr.Column():
154
+ # خيارات اللغات لتكون الأداة شاملة
155
+ src_lang = gr.Dropdown(
156
+ ["eng_Latn", "spa_Latn", "fra_Latn", "deu_Latn"],
157
+ label="Source Language", value="eng_Latn"
158
+ )
159
+ tgt_lang = gr.Dropdown(
160
+ ["arb_Arab", "arz_Arab (Egyptian)", "eng_Latn", "fra_Latn"],
161
+ label="Target Language", value="arb_Arab"
162
+ )
163
+
164
+ srt_output_trans = gr.File(label="Translated SRT")
165
+
166
+ trans_btn = gr.Button("Translate Subtitles", variant="primary")
167
+ trans_btn.click(
168
+ process_translation,
169
+ inputs=[srt_input, src_lang, tgt_lang],
170
+ outputs=srt_output_trans
171
+ )
172
 
173
  if __name__ == "__main__":
174
  demo.launch()