Spaces:

ulduldp
/

test-ffmpeg

Running

App Files Files Community

ulduldp commited on 13 days ago

Commit

9782092

verified ·

1 Parent(s): 3beecbd

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -35

app.py CHANGED Viewed

@@ -2,8 +2,6 @@ from flask import Flask, render_template_string, request, jsonify
 import os
 import uuid
 import subprocess
-import tempfile
-import shutil
 import textwrap
 from werkzeug.utils import secure_filename
 from faster_whisper import WhisperModel
@@ -18,7 +16,7 @@ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(OUTPUT_FOLDER, exist_ok=True)
 os.makedirs(SUBTITLE_FOLDER, exist_ok=True)
-# Load model once
 model = WhisperModel(
     "base",
     device="cpu",
@@ -180,9 +178,9 @@ form.addEventListener("submit", async (e)=>{
     const formData = new FormData(form);
     try{
-        const response = await fetch("/generate",{
-            method:"POST",
-            body:formData
         });
         const data = await response.json();
@@ -198,7 +196,6 @@ form.addEventListener("submit", async (e)=>{
         }else{
             alert(data.error || "Failed");
         }
     }catch(err){
         loading.style.display = "none";
         alert("Server Error");
@@ -218,12 +215,16 @@ def ass_time(seconds: float) -> str:
     return f"{h}:{m:02d}:{s:05.2f}"
 def ass_escape(text: str) -> str:
-    # Escape ASS special chars
     text = text.replace("\\", "\\\\")
     text = text.replace("{", "\\{").replace("}", "\\}")
     text = text.replace("\n", " ")
     return text
 def make_ass_subtitles(segments, ass_path):
     header = """[Script Info]
 ScriptType: v4.00+
@@ -244,11 +245,16 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         start = ass_time(seg["start"])
         end = ass_time(seg["end"])
-        text = textwrap.fill(seg["text"].strip(), width=36)
-        text = ass_escape(text).replace("\n", r"\N")
         lines.append(
-            f"Dialogue: 0,{start},{end},Default,,0,0,0,,{text}\n"
         )
     with open(ass_path, "w", encoding="utf-8") as f:
@@ -274,19 +280,17 @@ def generate():
     image_name = secure_filename(image.filename)
     audio_name = secure_filename(audio.filename)
-    image_path = os.path.join(UPLOAD_FOLDER, uid + "_" + image_name)
-    audio_path = os.path.join(UPLOAD_FOLDER, uid + "_" + audio_name)
-    output_filename = uid + ".mp4"
     output_path = os.path.join(OUTPUT_FOLDER, output_filename)
-    ass_path = os.path.join(SUBTITLE_FOLDER, uid + ".ass")
     image.save(image_path)
     audio.save(audio_path)
     try:
-        # 1) Transcribe audio
         segments_iter, info = model.transcribe(
             audio_path,
             beam_size=5,
@@ -308,20 +312,19 @@ def generate():
             })
             full_text_parts.append(text)
-        full_text = " ".join(full_text_parts).strip()
-        # 2) Create ASS subtitles
         make_ass_subtitles(transcript, ass_path)
-        # 3) FFmpeg command with smooth animation + subtitles
-        # Subtle zoom/pan effect + subtitles at bottom
-        filter_complex = (
-            f"[0:v]scale=1280:720:force_original_aspect_ratio=increase,"
-            f"crop=1280:720,"
-            f"zoompan=z='min(zoom+0.0008,1.08)':"
-            f"x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':"
-            f"d=1:s=1280x720:fps=30,"
-            f"subtitles='{ass_path.replace(\"'\", \"\\\\'\")}'[v]"
         )
         cmd = [
@@ -330,9 +333,7 @@ def generate():
             "-loop", "1",
             "-i", image_path,
             "-i", audio_path,
-            "-filter_complex", filter_complex,
-            "-map", "[v]",
-            "-map", "1:a:0",
             "-c:v", "libx264",
             "-pix_fmt", "yuv420p",
             "-c:a", "aac",
@@ -341,7 +342,7 @@ def generate():
             output_path
         ]
-        subprocess.run(
             cmd,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
@@ -351,7 +352,7 @@ def generate():
         return jsonify({
             "video_url": f"/static/videos/{output_filename}",
             "transcript": transcript,
-            "full_text": full_text,
             "language": getattr(info, "language", None)
         })

 import os
 import uuid
 import subprocess
 import textwrap
 from werkzeug.utils import secure_filename
 from faster_whisper import WhisperModel
 os.makedirs(OUTPUT_FOLDER, exist_ok=True)
 os.makedirs(SUBTITLE_FOLDER, exist_ok=True)
+# Load Whisper once
 model = WhisperModel(
     "base",
     device="cpu",
     const formData = new FormData(form);
     try{
+        const response = await fetch("/generate", {
+            method: "POST",
+            body: formData
         });
         const data = await response.json();
         }else{
             alert(data.error || "Failed");
         }
     }catch(err){
         loading.style.display = "none";
         alert("Server Error");
     return f"{h}:{m:02d}:{s:05.2f}"
 def ass_escape(text: str) -> str:
     text = text.replace("\\", "\\\\")
     text = text.replace("{", "\\{").replace("}", "\\}")
     text = text.replace("\n", " ")
     return text
+def escape_ffmpeg_path(path: str) -> str:
+    # Escape for FFmpeg subtitles filter
+    # Works well for local Linux paths
+    return path.replace("\\", "\\\\").replace(":", "\\:").replace("'", r"\'")
 def make_ass_subtitles(segments, ass_path):
     header = """[Script Info]
 ScriptType: v4.00+
         start = ass_time(seg["start"])
         end = ass_time(seg["end"])
+        text = seg["text"].strip()
+        if not text:
+            continue
+        # Wrap long subtitles nicely
+        wrapped = textwrap.fill(text, width=36)
+        wrapped = ass_escape(wrapped).replace("\n", r"\N")
         lines.append(
+            f"Dialogue: 0,{start},{end},Default,,0,0,0,,{wrapped}\n"
         )
     with open(ass_path, "w", encoding="utf-8") as f:
     image_name = secure_filename(image.filename)
     audio_name = secure_filename(audio.filename)
+    image_path = os.path.join(UPLOAD_FOLDER, f"{uid}_{image_name}")
+    audio_path = os.path.join(UPLOAD_FOLDER, f"{uid}_{audio_name}")
+    output_filename = f"{uid}.mp4"
     output_path = os.path.join(OUTPUT_FOLDER, output_filename)
+    ass_path = os.path.join(SUBTITLE_FOLDER, f"{uid}.ass")
     image.save(image_path)
     audio.save(audio_path)
     try:
+        # Transcribe audio
         segments_iter, info = model.transcribe(
             audio_path,
             beam_size=5,
             })
             full_text_parts.append(text)
         make_ass_subtitles(transcript, ass_path)
+        safe_ass_path = escape_ffmpeg_path(os.path.abspath(ass_path))
+        # Smooth zoom animation + subtitles
+        vf = (
+            "scale=1280:720:force_original_aspect_ratio=increase,"
+            "crop=1280:720,"
+            "zoompan=z='min(zoom+0.0008,1.08)':"
+            "x='iw/2-(iw/zoom/2)':"
+            "y='ih/2-(ih/zoom/2)':"
+            "d=999999:s=1280x720:fps=30,"
+            f"subtitles='{safe_ass_path}'"
         )
         cmd = [
             "-loop", "1",
             "-i", image_path,
             "-i", audio_path,
+            "-vf", vf,
             "-c:v", "libx264",
             "-pix_fmt", "yuv420p",
             "-c:a", "aac",
             output_path
         ]
+        result = subprocess.run(
             cmd,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
         return jsonify({
             "video_url": f"/static/videos/{output_filename}",
             "transcript": transcript,
+            "full_text": " ".join(full_text_parts).strip(),
             "language": getattr(info, "language", None)
         })