Spaces:

kader1997
/

AutoCaptionPro

Sleeping

App Files Files Community

kader1997 commited on Dec 23, 2025

Commit

0561398

verified ·

1 Parent(s): c5a8cc6

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -56

app.py CHANGED Viewed

@@ -12,7 +12,9 @@ model = WhisperModel("large-v3", device="cpu", compute_type="int8")
 def process_arabic_text(text):
     if not text: return ""
     reshaped = reshape(text)
     return reshaped + "\n "
 def clean_color(color_str):
@@ -23,32 +25,16 @@ def clean_color(color_str):
             return f'rgb({r},{g},{b})'
     return color_str
-def step_1_extract_with_translation(video_path, progress=gr.Progress()):
     if not video_path: return None, "الرجاء رفع فيديو."
-    # استخراج النص العربي الأصلي مع التوقيت
-    segments_ar, _ = model.transcribe(video_path, word_timestamps=True, language="ar")
-    # طلب الترجمة إلى الإنجليزية من النموذج مباشرة
-    segments_en, _ = model.transcribe(video_path, task="translate")
-    # سنقوم بدمج الترجمة الإنجليزية مع الكلمات العربية بناءً على التوقيت تقريبياً
     words_data = []
-    # لتحسين الأداء سنضع الترجمة الإنجليزية في قائمة
-    en_texts = list(segments_en)
-    for segment in segments_ar:
-        # البحث عن أقرب ترجمة إنجليزية لهذا الجزء
-        en_translation = ""
-        for en_seg in en_texts:
-            if (en_seg.start <= segment.start <= en_seg.end) or (segment.start <= en_seg.start <= segment.end):
-                en_translation = en_seg.text.strip()
-                break
         for word in segment.words:
-            words_data.append([word.word.strip(), en_translation, round(word.start, 2), round(word.end, 2)])
-    return pd.DataFrame(words_data, columns=["العربية", "الترجمة الإنجليزية", "البداية", "النهاية"]), "تم الاستخراج والترجمة!"
 def step_2_render_video(video_path, df_edited, font_selection, text_color, font_size, progress=gr.Progress()):
     if video_path is None or df_edited is None: return None, "بيانات ناقصة."
@@ -56,66 +42,53 @@ def step_2_render_video(video_path, df_edited, font_selection, text_color, font_
     safe_color = clean_color(text_color)
     actual_font = font_selection if os.path.exists(font_selection) else "DejaVu-Sans-Bold"
-    output_path = "final_bilingual_video.mp4"
     video = VideoFileClip(video_path)
     w, h = video.size
     clips = [video]
-    data_list = df_edited.values.tolist()
-    for row in data_list:
-        ar_text = str(row[0])
-        en_text = str(row[1])
-        t_start = float(row[2])
-        t_end = float(row[3])
-        if not ar_text.strip(): continue
-        # 1. النص العربي (كبير)
-        clean_ar = process_arabic_text(ar_text)
-        ar_clip = TextClip(
-            text=clean_ar,
             font_size=int(font_size),
             color=safe_color,
             stroke_color='black',
-            stroke_width=2,
             font=actual_font,
             method='label'
-        ).with_start(t_start).with_duration(max(0.1, t_end - t_start)).with_position(('center', int(h * 0.45)))
-        # 2. النص الإنجليزي (صغير) في الأسفل
-        en_clip = TextClip(
-            text=en_text,
-            font_size=int(font_size * 0.4), # تصغير الترجمة لـ 40% من حجم العربي
-            color="white",
-            stroke_color='black',
-            stroke_width=1,
-            font="Arial", # خط بسيط للإنجليزي
-            method='caption',
-            size=(int(w * 0.8), None)
-        ).with_start(t_start).with_duration(max(0.1, t_end - t_start)).with_position(('center', int(h * 0.6)))
-        clips.append(ar_clip)
-        clips.append(en_clip)
     final = CompositeVideoClip(clips, size=(w, h))
     final.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=video.fps, logger='bar')
-    return output_path, "تم إنتاج الفيديو المترجم بنجاح!"
 # --- الواجهة ---
 with gr.Blocks() as app:
-    gr.Markdown("## 🎬 محرر الفيديو المزدوج (عربي + إنجليزي)")
     with gr.Row():
         v_in = gr.Video(); v_out = gr.Video()
     with gr.Row():
-        font_opt = gr.Dropdown(choices=["arialbd.ttf"], value="arialbd.ttf", label="الخط العربي")
-        color_opt = gr.ColorPicker(value="#FF8C00", label="لون العربي")
-        size_opt = gr.Slider(30, 200, value=90, label="حجم الخط العربي")
-    btn_1 = gr.Button("1. تحليل وترجمة"); table = gr.Dataframe(interactive=True)
     btn_2 = gr.Button("2. إنتاج الفيديو"); status = gr.Textbox()
-    btn_1.click(step_1_extract_with_translation, [v_in], [table, status])
     btn_2.click(step_2_render_video, [v_in, table, font_opt, color_opt, size_opt], [v_out, status])
 app.launch()

 def process_arabic_text(text):
     if not text: return ""
+    # 1. إعادة تشكيل الحروف العربية لتظهر متصلة وصحيحة (بدون النقاط الإضافية •)
     reshaped = reshape(text)
+    # 2. إضافة سطر فارغ في الأسفل لمنع قص النقاط السفلية للأحرف
     return reshaped + "\n "
 def clean_color(color_str):
             return f'rgb({r},{g},{b})'
     return color_str
+def step_1_extract_words(video_path, progress=gr.Progress()):
     if not video_path: return None, "الرجاء رفع فيديو."
+    segments, _ = model.transcribe(video_path, word_timestamps=True, language="ar")
     words_data = []
+    for segment in segments:
         for word in segment.words:
+            words_data.append([word.word.strip(), round(word.start, 2), round(word.end, 2)])
+    return pd.DataFrame(words_data, columns=["الكلمة", "البداية", "النهاية"]), "تم استخراج الكلمات!"
 def step_2_render_video(video_path, df_edited, font_selection, text_color, font_size, progress=gr.Progress()):
     if video_path is None or df_edited is None: return None, "بيانات ناقصة."
     safe_color = clean_color(text_color)
     actual_font = font_selection if os.path.exists(font_selection) else "DejaVu-Sans-Bold"
+    output_path = "final_clean_text_video.mp4"
     video = VideoFileClip(video_path)
     w, h = video.size
     clips = [video]
+    words_list = df_edited.values.tolist()
+    for row in words_list:
+        word_text = str(row[0])
+        t_start = float(row[1])
+        t_end = float(row[2])
+        if not word_text.strip(): continue
+        clean_word = process_arabic_text(word_text)
+        txt = TextClip(
+            text=clean_word,
             font_size=int(font_size),
             color=safe_color,
             stroke_color='black',
+            stroke_width=2.0, # تقليل سمك التحديد قليلاً ليتناسب مع النص الأصغر
             font=actual_font,
             method='label'
+        ).with_start(t_start).with_duration(max(0.1, t_end - t_start)).with_position(('center', int(h * 0.5)))
+        clips.append(txt)
     final = CompositeVideoClip(clips, size=(w, h))
     final.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=video.fps, logger='bar')
+    return output_path, "تم إنتاج الفيديو بنجاح!"
 # --- الواجهة ---
 with gr.Blocks() as app:
+    gr.Markdown("## 🎬 محرر الفيديو: نصوص نظيفة")
     with gr.Row():
         v_in = gr.Video(); v_out = gr.Video()
     with gr.Row():
+        font_opt = gr.Dropdown(choices=["arialbd.ttf"], value="arialbd.ttf", label="الخط")
+        color_opt = gr.ColorPicker(value="#FF8C00", label="لون ذهبي برتقالي")
+        # تم تصغير الحجم الافتراضي من 130 إلى 90
+        size_opt = gr.Slider(30, 200, value=90, label="حجم النص")
+    btn_1 = gr.Button("1. تحليل الكلمات"); table = gr.Dataframe(interactive=True)
     btn_2 = gr.Button("2. إنتاج الفيديو"); status = gr.Textbox()
+    btn_1.click(step_1_extract_words, [v_in], [table, status])
     btn_2.click(step_2_render_video, [v_in, table, font_opt, color_opt, size_opt], [v_out, status])
 app.launch()