Spaces:

ulduldp
/

test-ffmpeg

Running

App Files Files Community

ulduldp commited on 12 days ago

Commit

e502d42

verified ·

1 Parent(s): 0d51b0d

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -31

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(OUTPUT_FOLDER, exist_ok=True)
 os.makedirs(SUBTITLE_FOLDER, exist_ok=True)
-# Smallest + fastest Whisper model for CPU
 model = WhisperModel(
     "tiny",
     device="cpu",
@@ -148,13 +148,10 @@ video{
 <body>
 <div class="container">
     <h1>Photo + Audio → Video</h1>
     <form id="form">
         <div class="upload-box">
             <label>Select Photo</label>
             <input type="file" id="image" name="image" accept="image/*" required>
@@ -162,11 +159,9 @@ video{
             <label>Select Audio (mp3/wav)</label>
             <input type="file" name="audio" accept="audio/*" required>
         </div>
         <button type="submit">Generate Video</button>
     </form>
     <div id="loading">Generating Video...</div>
@@ -176,7 +171,6 @@ video{
     <div class="download-btn" id="downloadDiv">
         <a id="downloadBtn" download>Download Video</a>
     </div>
 </div>
 <script>
@@ -211,7 +205,6 @@ form.addEventListener("submit", async (e)=>{
         });
         const data = await response.json();
         loading.style.display = "none";
         if(data.video_url){
@@ -222,11 +215,12 @@ form.addEventListener("submit", async (e)=>{
             downloadDiv.style.display = "block";
         }else{
             alert(data.error || "Failed");
         }
     }catch(err){
         loading.style.display = "none";
         alert("Server Error");
     }
 });
 </script>
@@ -251,7 +245,6 @@ def ass_escape(text: str) -> str:
     return text
 def escape_ffmpeg_path(path: str) -> str:
-    # For ffmpeg filter strings
     return (
         path
         .replace("\\", "\\\\")
@@ -262,25 +255,34 @@ def escape_ffmpeg_path(path: str) -> str:
 def wrap_caption_text(text: str) -> str:
     text = text.strip()
-    # Smaller wrap widths to prevent left/right crop
-    if len(text) <= 20:
-        width = 12
-    elif len(text) <= 40:
-        width = 15
-    elif len(text) <= 70:
         width = 18
-    elif len(text) <= 110:
-        width = 22
     else:
-        width = 24
-    return textwrap.fill(
         text,
         width=width,
         break_long_words=False,
         break_on_hyphens=False
     )
 def make_ass_subtitles(segments, ass_path):
     header = """[Script Info]
 ScriptType: v4.00+
@@ -292,7 +294,7 @@ WrapStyle: 2
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
-Style: Default,Arial,48,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,3,0,0,2,90,90,170,1
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
@@ -301,21 +303,20 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
     lines = [header]
     for seg in segments:
-        start = ass_time(seg["start"])
-        end = ass_time(seg["end"])
         text = seg["text"].strip()
         if not text:
             continue
         wrapped = wrap_caption_text(text)
-        wrapped = ass_escape(wrapped)
-        wrapped = wrapped.replace("\n", r"\N")
-        # Solid black background, white text
         dialogue = (
             f"Dialogue: 0,{start},{end},Default,,0,0,0,,"
-            r"{\bord0\shad0\blur0\1c&HFFFFFF&\3c&H000000&\4c&H000000&\4a&H00}"
             f"{wrapped}\n"
         )
@@ -369,7 +370,6 @@ def generate():
     audio.save(audio_path)
     try:
-        # Fast low CPU transcription
         segments_iter, info = model.transcribe(
             audio_path,
             beam_size=1,
@@ -394,7 +394,6 @@ def generate():
         make_ass_subtitles(transcript, ass_path)
         safe_ass_path = escape_ffmpeg_path(os.path.abspath(ass_path))
-        # Scale/crop image to 9:16 + burn subtitles
         vf = (
             "scale=1080:1920:force_original_aspect_ratio=increase,"
             "crop=1080:1920,"
@@ -423,7 +422,7 @@ def generate():
             output_path
         ]
-        result = subprocess.run(
             cmd,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,

 os.makedirs(OUTPUT_FOLDER, exist_ok=True)
 os.makedirs(SUBTITLE_FOLDER, exist_ok=True)
+# Fast CPU model
 model = WhisperModel(
     "tiny",
     device="cpu",
 <body>
 <div class="container">
     <h1>Photo + Audio → Video</h1>
     <form id="form">
         <div class="upload-box">
             <label>Select Photo</label>
             <input type="file" id="image" name="image" accept="image/*" required>
             <label>Select Audio (mp3/wav)</label>
             <input type="file" name="audio" accept="audio/*" required>
         </div>
         <button type="submit">Generate Video</button>
     </form>
     <div id="loading">Generating Video...</div>
     <div class="download-btn" id="downloadDiv">
         <a id="downloadBtn" download>Download Video</a>
     </div>
 </div>
 <script>
         });
         const data = await response.json();
         loading.style.display = "none";
         if(data.video_url){
             downloadDiv.style.display = "block";
         }else{
             alert(data.error || "Failed");
+            console.log(data.details || "");
         }
     }catch(err){
         loading.style.display = "none";
         alert("Server Error");
+        console.error(err);
     }
 });
 </script>
     return text
 def escape_ffmpeg_path(path: str) -> str:
     return (
         path
         .replace("\\", "\\\\")
 def wrap_caption_text(text: str) -> str:
     text = text.strip()
+    # tighter wrapping to prevent crop on 9:16 frame
+    if len(text) <= 18:
+        width = 10
+    elif len(text) <= 35:
+        width = 13
+    elif len(text) <= 55:
+        width = 16
+    elif len(text) <= 80:
         width = 18
+    elif len(text) <= 120:
+        width = 20
     else:
+        width = 22
+    wrapped = textwrap.fill(
         text,
         width=width,
         break_long_words=False,
         break_on_hyphens=False
     )
+    # keep captions from getting too tall
+    lines = wrapped.splitlines()
+    if len(lines) > 4:
+        wrapped = "\n".join(lines[:4])
+    return wrapped
 def make_ass_subtitles(segments, ass_path):
     header = """[Script Info]
 ScriptType: v4.00+
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,Arial,40,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,3,0,0,2,140,140,260,1
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
     lines = [header]
     for seg in segments:
         text = seg["text"].strip()
         if not text:
             continue
+        start = ass_time(seg["start"])
+        end = ass_time(seg["end"])
         wrapped = wrap_caption_text(text)
+        wrapped = ass_escape(wrapped).replace("\n", r"\N")
+        # Opaque black background box + white text
         dialogue = (
             f"Dialogue: 0,{start},{end},Default,,0,0,0,,"
+            r"{\bord0\shad0\blur0\be0\1c&HFFFFFF&\3c&H000000&\4c&H000000&\3a&H00&\4a&H00}"
             f"{wrapped}\n"
         )
     audio.save(audio_path)
     try:
         segments_iter, info = model.transcribe(
             audio_path,
             beam_size=1,
         make_ass_subtitles(transcript, ass_path)
         safe_ass_path = escape_ffmpeg_path(os.path.abspath(ass_path))
         vf = (
             "scale=1080:1920:force_original_aspect_ratio=increase,"
             "crop=1080:1920,"
             output_path
         ]
+        subprocess.run(
             cmd,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,