Rus_Video_captionning

Sleeping

App Files Files Community

AndreySokolov01 commited on Jan 21

Commit

23f1f1e

verified ·

1 Parent(s): 9334e1f

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -45

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ PRESETS = {
     "Viral Shorts (TOP)": {
         "Alignment": 8,
         "FontName": "Arial Black",
-        "FontSize": 12,
         "PrimaryColour": "#FFFF00",
         "Outline": 4,
         "OutlineColour": "#000000",
@@ -38,7 +38,7 @@ PRESETS = {
     "Минимал низ": {
         "Alignment": 2,
         "FontName": "Montserrat",
-        "FontSize": 12,
         "PrimaryColour": "#17FC03",
         "Outline": 1,
         "OutlineColour": "#000000",
@@ -82,24 +82,44 @@ def format_subtitle_text(text, font_size, bold):
         text = text.upper()
     else:
         text = text.capitalize()
-    wrap_width = 18 if font_size >= 48 else 36
     return "\n".join(wrap(text, wrap_width))
-# === Модели (без изменений) ===
 def get_whisper():
-    if "whisper" in _cache: return _cache["whisper"]
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float16 if device == "cuda" else torch.float32
     processor = WhisperProcessor.from_pretrained(WHISPER_MODEL)
-    model = WhisperForConditionalGeneration.from_pretrained(WHISPER_MODEL, torch_dtype=dtype, low_cpu_mem_usage=True)
-    pipe = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer,
-                    feature_extractor=processor.feature_extractor, return_timestamps=True,
-                    chunk_length_s=CHUNK_LENGTH, device=0 if device == "cuda" else -1)
     _cache["whisper"] = pipe
     return pipe
 def get_faster_whisper():
-    if "faster" in _cache: return _cache["faster"]
     device = "cuda" if torch.cuda.is_available() else "cpu"
     compute = "float16" if device == "cuda" else "int8"
     model = FasterWhisperModel(FASTERW_MODEL, device=device, compute_type=compute)
@@ -107,13 +127,15 @@ def get_faster_whisper():
     return model
 def get_parakeet():
-    if "parakeet" in _cache: return _cache["parakeet"]
     model = EncDecRNNTBPEModel.from_pretrained(PARAKEET_MODEL)
     model.eval()
     model = model.to("cuda" if torch.cuda.is_available() else "cpu")
     _cache["parakeet"] = model
     return model
 def transcribe(audio, backend):
     if backend == "Whisper":
         pipe = get_whisper()
@@ -130,11 +152,15 @@ def transcribe(audio, backend):
     step = 6
     for i in range(0, len(out), step):
         g = out[i:i+step]
-        chunks.append({"start": g[0]["start"], "end": g[-1]["end"], "text": " ".join(w["word"] for w in g)})
     return chunks
-# === НОВАЯ ФУНКЦИЯ: preview первого кадра с субтитрами ===
-def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, margin):
     if not video_path:
         return None
@@ -143,7 +169,7 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
     ass_file = os.path.join(tmp, "preview.ass")
     try:
-        # 1. Извлекаем первый кадр
         subprocess.run([
             'ffmpeg', '-y', '-i', video_path, '-vframes', '1', '-q:v', '2', frame
         ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
@@ -151,20 +177,11 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
         if not os.path.exists(frame):
             return None
-        # 2. Извлекаем аудио и транскрибируем (минимально)
-        wav = os.path.join(tmp, "audio.wav")
-        subprocess.run([
-            'ffmpeg', '-y', '-i', video_path, '-vn', '-ac', '1', '-ar', '16000', wav
-        ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-        segs = transcribe(wav, backend)
-        if not segs:
-            return None
-        first_seg = segs[0]
-        styled_text = format_subtitle_text(first_seg["text"], size, bold)
-        # 3. Генерируем .ass стиль
         style = {
             "FontName": font,
             "FontSize": int(size),
@@ -172,7 +189,7 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
             "BackColour": bg,
             "Bold": int(bold),
             "MarginV": int(margin),
-            "Alignment": 2,  # bottom center — можно менять
             "Outline": 1,
             "OutlineColour": "&H00000000",
             "BorderStyle": 1,
@@ -180,7 +197,7 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
         }
         style_str = style_to_force(style)
-        # 4. Создаём .ass файл
         with open(ass_file, "w", encoding="utf-8") as f:
             f.write("[Script Info]\n")
             f.write("ScriptType: v4.00+\n")
@@ -191,10 +208,9 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
             f.write(f"Style: Default,{style_str}\n\n")
             f.write("[Events]\n")
             f.write("Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n")
-            # Используем длительность 5 секунд для preview
             f.write(f"Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,{styled_text}\n")
-        # 5. Накладываем субтитры на кадр
         preview_img = os.path.join(tmp, "preview.jpg")
         safe_ass = ass_file.replace("\\", "/").replace(":", "\\:")
         subprocess.run([
@@ -210,7 +226,7 @@ def preview_subtitle_style(video_path, backend, font, size, color, bg, bold, mar
     return None
-# === Основная обработка (без изменений, кроме импорта) ===
 def process(video, backend, preset, font, size, color, bg, bold, margin):
     if not video:
         return "❌ Нет видео", None, None, "", None
@@ -260,9 +276,9 @@ def process(video, backend, preset, font, size, color, bg, bold, margin):
     except Exception as e:
         return f"❌ Ошибка: {str(e)}", None, None, "", None
-# === Интерфейс ===
 with gr.Blocks() as demo:
-    gr.Markdown("## 🎬 Автосубтитры + LIVE preview текста + Превью стиля")
     with gr.Row():
         with gr.Column():
@@ -272,34 +288,32 @@ with gr.Blocks() as demo:
             gr.Markdown("### 🎨 Ручная настройка")
             font = gr.Textbox("Montserrat", label="Шрифт")
-            size = gr.Slider(5, 72, 32, label="Размер")
             color = gr.ColorPicker("#FFFFFF", label="Цвет текста")
             bg = gr.ColorPicker("#80000000", label="Фон")
             bold = gr.Checkbox(True, label="Bold")
-            margin = gr.Slider(10, 100, 40, label="Отступ")
             with gr.Row():
-                run_btn = gr.Button("🚀 Сгенерировать")
                 preview_btn = gr.Button("👁️ Превью стиля")
         with gr.Column():
             status = gr.Markdown()
-            preview = gr.Textbox(label="LIVE preview текста", lines=8)
-            preview_img = gr.Image(label="Превью субтитров на кадре", type="filepath")
-            out_video = gr.Video()
-            out_srt = gr.File()
-    # Обработка полного видео
     run_btn.click(
         process,
         inputs=[video, backend, preset, font, size, color, bg, bold, margin],
         outputs=[status, out_video, out_srt, preview, preview_img]
     )
-    # Превью стиля (быстро!)
     preview_btn.click(
         preview_subtitle_style,
-        inputs=[video, backend, font, size, color, bg, bold, margin],
         outputs=[preview_img]
     )

     "Viral Shorts (TOP)": {
         "Alignment": 8,
         "FontName": "Arial Black",
+        "FontSize": 64,
         "PrimaryColour": "#FFFF00",
         "Outline": 4,
         "OutlineColour": "#000000",
     "Минимал низ": {
         "Alignment": 2,
         "FontName": "Montserrat",
+        "FontSize": 28,
         "PrimaryColour": "#17FC03",
         "Outline": 1,
         "OutlineColour": "#000000",
         text = text.upper()
     else:
         text = text.capitalize()
+    # Адаптивная ширина переноса
+    if font_size >= 60:
+        wrap_width = 12
+    elif font_size >= 48:
+        wrap_width = 16
+    elif font_size >= 36:
+        wrap_width = 24
+    else:
+        wrap_width = 36
     return "\n".join(wrap(text, wrap_width))
+# === Загрузка моделей ===
 def get_whisper():
+    if "whisper" in _cache:
+        return _cache["whisper"]
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float16 if device == "cuda" else torch.float32
     processor = WhisperProcessor.from_pretrained(WHISPER_MODEL)
+    model = WhisperForConditionalGeneration.from_pretrained(
+        WHISPER_MODEL, torch_dtype=dtype, low_cpu_mem_usage=True
+    )
+    pipe = pipeline(
+        "automatic-speech-recognition",
+        model=model,
+        tokenizer=processor.tokenizer,
+        feature_extractor=processor.feature_extractor,
+        return_timestamps=True,
+        chunk_length_s=CHUNK_LENGTH,
+        device=0 if device == "cuda" else -1,
+    )
     _cache["whisper"] = pipe
     return pipe
 def get_faster_whisper():
+    if "faster" in _cache:
+        return _cache["faster"]
     device = "cuda" if torch.cuda.is_available() else "cpu"
     compute = "float16" if device == "cuda" else "int8"
     model = FasterWhisperModel(FASTERW_MODEL, device=device, compute_type=compute)
     return model
 def get_parakeet():
+    if "parakeet" in _cache:
+        return _cache["parakeet"]
     model = EncDecRNNTBPEModel.from_pretrained(PARAKEET_MODEL)
     model.eval()
     model = model.to("cuda" if torch.cuda.is_available() else "cpu")
     _cache["parakeet"] = model
     return model
+# === Транскрибация ===
 def transcribe(audio, backend):
     if backend == "Whisper":
         pipe = get_whisper()
     step = 6
     for i in range(0, len(out), step):
         g = out[i:i+step]
+        chunks.append({
+            "start": g[0]["start"],
+            "end": g[-1]["end"],
+            "text": " ".join(w["word"] for w in g)
+        })
     return chunks
+# === Превью стиля (без транскрибации!) ===
+def preview_subtitle_style(video_path, font, size, color, bg, bold, margin):
     if not video_path:
         return None
     ass_file = os.path.join(tmp, "preview.ass")
     try:
+        # Извлекаем первый кадр
         subprocess.run([
             'ffmpeg', '-y', '-i', video_path, '-vframes', '1', '-q:v', '2', frame
         ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
         if not os.path.exists(frame):
             return None
+        # Фиксированный текст для превью
+        example_text = "Тут ваши субтитры"
+        styled_text = format_subtitle_text(example_text, size, bold)
+        # Стиль
         style = {
             "FontName": font,
             "FontSize": int(size),
             "BackColour": bg,
             "Bold": int(bold),
             "MarginV": int(margin),
+            "Alignment": 2,
             "Outline": 1,
             "OutlineColour": "&H00000000",
             "BorderStyle": 1,
         }
         style_str = style_to_force(style)
+        # Создаём .ass
         with open(ass_file, "w", encoding="utf-8") as f:
             f.write("[Script Info]\n")
             f.write("ScriptType: v4.00+\n")
             f.write(f"Style: Default,{style_str}\n\n")
             f.write("[Events]\n")
             f.write("Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n")
             f.write(f"Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,{styled_text}\n")
+        # Накладываем на кадр
         preview_img = os.path.join(tmp, "preview.jpg")
         safe_ass = ass_file.replace("\\", "/").replace(":", "\\:")
         subprocess.run([
     return None
+# === Полная обработка видео ===
 def process(video, backend, preset, font, size, color, bg, bold, margin):
     if not video:
         return "❌ Нет видео", None, None, "", None
     except Exception as e:
         return f"❌ Ошибка: {str(e)}", None, None, "", None
+# === Интерфейс Gradio ===
 with gr.Blocks() as demo:
+    gr.Markdown("## 🎬 Автосубтитры (Whisper / FasterWhisper / NeMo) + LIVE preview + Превью стиля")
     with gr.Row():
         with gr.Column():
             gr.Markdown("### 🎨 Ручная настройка")
             font = gr.Textbox("Montserrat", label="Шрифт")
+            size = gr.Slider(minimum=10, maximum=96, value=32, step=1, label="Размер шрифта")
             color = gr.ColorPicker("#FFFFFF", label="Цвет текста")
             bg = gr.ColorPicker("#80000000", label="Фон")
             bold = gr.Checkbox(True, label="Bold")
+            margin = gr.Slider(10, 100, 40, label="Отступ снизу")
             with gr.Row():
+                run_btn = gr.Button("🚀 Сгенерировать субтитры")
                 preview_btn = gr.Button("👁️ Превью стиля")
         with gr.Column():
             status = gr.Markdown()
+            preview = gr.Textbox(label="LIVE preview текста субтитров", lines=8)
+            preview_img = gr.Image(label="Превью стиля на кадре", type="filepath")
+            out_video = gr.Video(label="Видео с субтитрами")
+            out_srt = gr.File(label="SRT файл")
     run_btn.click(
         process,
         inputs=[video, backend, preset, font, size, color, bg, bold, margin],
         outputs=[status, out_video, out_srt, preview, preview_img]
     )
     preview_btn.click(
         preview_subtitle_style,
+        inputs=[video, font, size, color, bg, bold, margin],
         outputs=[preview_img]
     )