Spaces:

kader1997
/

AutoCaptionPro

Running

App Files Files Community

kader1997 commited on Dec 22, 2025

Commit

cf558fa

verified ·

1 Parent(s): 04fb6eb

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -61

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 os.environ["IMAGE_MAGICK_BINARY"] = "/usr/bin/convert"
 import gradio as gr
 import pandas as pd
 from faster_whisper import WhisperModel
 from moviepy import VideoFileClip, TextClip, CompositeVideoClip
 from arabic_reshaper import reshape
@@ -12,99 +13,77 @@ model = WhisperModel("large-v3", device="cpu", compute_type="int8")
 def process_arabic_text(text):
     return reshape(text) + "\n "
 def step_1_extract_words(video_path, progress=gr.Progress()):
-    if not video_path:
-        return None, "الرجاء رفع فيديو أولاً."
-    progress(0, desc="جاري استخراج الكلمات...")
     segments, _ = model.transcribe(video_path, word_timestamps=True, language="ar")
-    words_data = []
-    for segment in segments:
-        for word in segment.words:
-            words_data.append([word.word.strip(), round(word.start, 2), round(word.end, 2)])
-    df = pd.DataFrame(words_data, columns=["الكلمة", "البداية", "النهاية"])
-    return df, "تم الاستخراج!"
 def step_2_render_video(video_path, df_edited, font_selection, text_color, font_size, progress=gr.Progress()):
-    if video_path is None or df_edited is None or df_edited.empty:
-        return None, "بيانات ناقصة."
-    # التأكد من مسار الخط: إذا لم يجد الملف المرفوع سيستخدم Arial الافتراضي
-    actual_font = font_selection if os.path.exists(font_selection) else "Arial-Bold"
-    output_path = "output_final.mp4"
     video = VideoFileClip(video_path)
-    w, h = int(video.w), int(video.h)
     clips = [video]
     words_list = df_edited.values.tolist()
     chunk_size = 3
-    progress(0.1, desc="جاري معالجة النصوص بالإعدادات الجديدة...")
     for i in range(0, len(words_list), chunk_size):
-        current_chunk = words_list[i : i + chunk_size]
-        sentence = " ".join([str(r[0]) for r in current_chunk])
         clean_sentence = process_arabic_text(sentence)
-        c_start = float(current_chunk[0][1])
-        c_end = float(current_chunk[-1][2])
-        duration = max(0.1, c_end - c_start)
-        # إنشاء الكليب مع التأكد من تمرير اللون والخط بشكل صريح
         txt = TextClip(
             text=clean_sentence,
             font_size=int(font_size),
-            color=text_color, # سيأخذ اللون من ColorPicker مباشرة
             stroke_color='black',
             stroke_width=1.5,
             font=actual_font,
             method='caption',
-            size=(int(w * 0.85), None),
             text_align='center'
-        ).with_start(c_start).with_duration(duration).with_position(('center', int(h * 0.65)))
         clips.append(txt)
-    final_video = CompositeVideoClip(clips, size=(w, h))
-    # تحسين سرعة المعالجة وتقليل المشاكل
-    final_video.write_videofile(
-        output_path,
-        codec="libx264",
-        audio_codec="aac",
-        fps=video.fps,
-        logger='bar',
-        threads=4 # استخدام تعدد الخيوط لتسريع العملية
-    )
-    return output_path, f"تم الحفظ! اللون: {text_color}، الخط: {actual_font}"
 # --- الواجهة ---
 with gr.Blocks() as app:
-    gr.Markdown("### 🎬 Caption Pro: تخصيص كامل")
     with gr.Row():
-        v_in = gr.Video()
-        v_out = gr.Video()
     with gr.Row():
-        # تأكد من رفع هذه الملفات في Hugging Face (مثلاً Cairo-Bold.ttf)
-        font_opt = gr.Dropdown(
-            choices=["arialbd.ttf", "Cairo-Bold.ttf", "Almarai-Bold.ttf"],
-            value="arialbd.ttf",
-            label="نوع الخط"
-        )
-        color_opt = gr.ColorPicker(value="#FFFF00", label="لون النص")
-        size_opt = gr.Slider(30, 150, value=70, label="حجم الخط")
-    btn_1 = gr.Button("1. استخراج")
-    table = gr.Dataframe(headers=["الكلمة", "البداية", "النهاية"], interactive=True)
-    btn_2 = gr.Button("2. إنتاج الفيديو")
-    status = gr.Textbox(label="الحالة")
-    btn_1.click(step_1_extract_words, inputs=[v_in], outputs=[table, status])
-    btn_2.click(step_2_render_video, inputs=[v_in, table, font_opt, color_opt, size_opt], outputs=[v_out, status])
 app.launch()

 os.environ["IMAGE_MAGICK_BINARY"] = "/usr/bin/convert"
 import gradio as gr
 import pandas as pd
+import re  # نحتاجه لتنظيف الألوان
 from faster_whisper import WhisperModel
 from moviepy import VideoFileClip, TextClip, CompositeVideoClip
 from arabic_reshaper import reshape
 def process_arabic_text(text):
     return reshape(text) + "\n "
+def clean_color(color_str):
+    """دالة لتحويل أي صيغة لون غريبة إلى صيغة يفهمها البرنامج"""
+    if color_str.startswith('rgba'):
+        # استخراج الأرقام فقط وتحويلها لأرقام صحيحة
+        nums = re.findall(r"\d+\.?\d*", color_str)
+        if len(nums) >= 3:
+            r, g, b = int(float(nums[0])), int(float(nums[1])), int(float(nums[2]))
+            return f'rgb({r},{g},{b})'
+    return color_str
 def step_1_extract_words(video_path, progress=gr.Progress()):
+    if not video_path: return None, "الرجاء رفع فيديو."
     segments, _ = model.transcribe(video_path, word_timestamps=True, language="ar")
+    words_data = [[w.word.strip(), round(w.start, 2), round(w.end, 2)] for s in segments for w in s.words]
+    return pd.DataFrame(words_data, columns=["الكلمة", "البداية", "النهاية"]), "تم الاستخراج!"
 def step_2_render_video(video_path, df_edited, font_selection, text_color, font_size, progress=gr.Progress()):
+    if video_path is None or df_edited is None: return None, "بيانات ناقصة."
+    # تنظيف اللون قبل استخدامه لمنع خطأ ValueError
+    safe_color = clean_color(text_color)
+    # التأكد من وجود ملف الخط
+    actual_font = font_selection if os.path.exists(font_selection) else "DejaVu-Sans-Bold"
+    output_path = "final_fixed_video.mp4"
     video = VideoFileClip(video_path)
+    w, h = video.size
     clips = [video]
     words_list = df_edited.values.tolist()
     chunk_size = 3
     for i in range(0, len(words_list), chunk_size):
+        chunk = words_list[i : i + chunk_size]
+        sentence = " ".join([str(r[0]) for r in chunk])
         clean_sentence = process_arabic_text(sentence)
+        t_start, t_end = float(chunk[0][1]), float(chunk[-1][2])
         txt = TextClip(
             text=clean_sentence,
             font_size=int(font_size),
+            color=safe_color, # اللون النظيف هنا
             stroke_color='black',
             stroke_width=1.5,
             font=actual_font,
             method='caption',
+            size=(int(w * 0.8), None),
             text_align='center'
+        ).with_start(t_start).with_duration(max(0.1, t_end - t_start)).with_position(('center', int(h * 0.65)))
         clips.append(txt)
+    final = CompositeVideoClip(clips, size=(w, h))
+    final.write_videofile(output_path, codec="libx264", audio_codec="aac", fps=video.fps, logger='bar')
+    return output_path, f"تم بنجاح! اللون المستخدم: {safe_color}"
 # --- الواجهة ---
 with gr.Blocks() as app:
+    gr.Markdown("## 🎬 Caption Pro: Stable Version")
     with gr.Row():
+        v_in = gr.Video(); v_out = gr.Video()
     with gr.Row():
+        font_opt = gr.Dropdown(choices=["arialbd.ttf", "Cairo-Bold.ttf", "Almarai-Bold.ttf"], value="arialbd.ttf", label="الخط")
+        color_opt = gr.ColorPicker(value="#FFFF00", label="اللون")
+        size_opt = gr.Slider(30, 150, value=70, label="الحجم")
+    btn_1 = gr.Button("1. استخراج"); table = gr.Dataframe(interactive=True)
+    btn_2 = gr.Button("2. إنتاج الفيديو"); status = gr.Textbox()
+    btn_1.click(step_1_extract_words, [v_in], [table, status])
+    btn_2.click(step_2_render_video, [v_in, table, font_opt, color_opt, size_opt], [v_out, status])
 app.launch()