backendprocesssuper

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Sep 7, 2025

Commit

c4b79fe

verified ·

1 Parent(s): 1eece32

Update video2.py

Browse files

Files changed (1) hide show

video2.py +36 -84

video2.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from moviepy.editor import *
 from PIL import Image
 import pytesseract
 import numpy as np
@@ -15,20 +16,17 @@ import asyncio
 import cv2
 import numpy as np
 import subprocess, shlex, os, time
-# from IPython.display import Video, display, HTML  # Commented out for Hugging Face Spaces compatibility
 import math
 # Use /app/data which we created with proper permissions
 BASE_DIR = "/app/data"
 IMAGE_DIR = "/tmp/images"
 os.makedirs(IMAGE_DIR, exist_ok=True)
 AUDIO_DIR = os.path.join(BASE_DIR, "sound")
 CLIPS_DIR = os.path.join(BASE_DIR, "video")
 # Create directories (no chmod needed)
 for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
     Path(path).mkdir(parents=True, exist_ok=True)
 async def generate_tts(id, lines):
     voice = "en-US-JennyNeural"
     audio_name = f"audio{id}.mp3"
@@ -38,57 +36,52 @@ async def generate_tts(id, lines):
     #lang = listf[1].strip()
     communicate = edge_tts.Communicate(text=lines[id], voice=voice, rate="+0%")
     await communicate.save(audio_path)
     if os.path.exists(audio_path):
         audio = MP3(audio_path)
         duration = audio.info.length
         return duration, audio_path
     return None, None
 def audio_func(id, lines):
     return asyncio.run(generate_tts(id, lines))
 def video_func(id, lines):
     duration, audio_path = audio_func(id, lines)
     if not duration or not audio_path:
         print("Failed to generate audio.")
         return None
     #listf = lines.split("&&&")
     #TEXT = listf[0].strip()
     TEXT=lines[id]
     SKIP_SPACES = False
-    FPS = 30                      # Increased for smoother animation
     ANIMATION_FRAMES_PER_CHAR = 3 # Number of sub-frames for pen movement per character
-    WIDTH, HEIGHT = 1280, 720     # Keep as is
     MARGIN_X, MARGIN_Y = 40, 60
-    LINE_SPACING = 8              # additional px between lines
     FONT = cv2.FONT_HERSHEY_SIMPLEX
-    FONT_SCALE = 1.0              # tweak for desired size
     THICKNESS = 2
-    TEXT_COLOR = (0, 0, 0)        # BGR
-    BG_COLOR = (255, 255, 255)    # BGR
     silent_video_name = f"silent_video{id}.mp4"
     silent_video_path = os.path.join(CLIPS_DIR, silent_video_name)
-    FFMPEG_PRESET = "ultrafast"   # fastest encode
-    CRF = 23                      # For faster encoding
     # Pen settings
-    PEN_COLOR = (0, 0, 255)       # Red pen for visibility (BGR)
-    PEN_TIP_RADIUS = 5            # Size of pen tip circle
-    PEN_LENGTH = 20               # Length of pen line
-    PEN_THICKNESS = 2             # Thickness of pen line
-    PEN_BASE_ANGLE = 45           # Base angle of pen (degrees)
-    PEN_MOVEMENT_AMPLITUDE = 10   # How much the pen moves up/down (pixels)
     # ===================================
     # Helper: wrap text by pixel width using cv2.getTextSize
     def wrap_text_cv(text, font, font_scale, thickness, max_width):
         wrapped_lines = []
         for para in text.splitlines():
             if para == "":
-                wrapped_lines.append("")  # preserve blank line
                 continue
             words = para.split(" ")
             cur = ""
@@ -120,40 +113,26 @@ def video_func(id, lines):
             if cur != "":
                 wrapped_lines.append(cur)
         return wrapped_lines
     # Pre-wrap text
     text_area_width = WIDTH - 2 * MARGIN_X
     wrapped_lines = wrap_text_cv(TEXT, FONT, FONT_SCALE, THICKNESS, text_area_width)
     full_text = "\n".join(wrapped_lines)
     if not full_text:
         full_text = ""
     # Visible indices
     if SKIP_SPACES:
         visible_indices = [i for i, ch in enumerate(full_text) if (ch != ' ' and ch != '\n' and ch != '\t')]
     else:
         visible_indices = list(range(len(full_text)))
     total_glyphs = len(visible_indices)
     print(f"Wrapped lines: {len(wrapped_lines)} lines, total glyphs (counted): {total_glyphs}")
     if total_glyphs == 0:
         print("No text to animate.")
         return None
-    # Calculate REPEAT_FRAMES_PER_CHAR to approximate audio duration
-    desired_frames = math.ceil(duration * FPS)
     min_frames = total_glyphs * ANIMATION_FRAMES_PER_CHAR
-    extra_frames = desired_frames - min_frames
-    if extra_frames > 0:
-        REPEAT_FRAMES_PER_CHAR = math.floor(extra_frames / total_glyphs)
-        remaining_frames = extra_frames % total_glyphs
-    else:
-        REPEAT_FRAMES_PER_CHAR = 0
-        remaining_frames = 0
-    # But we'll add remaining as hold at end if needed, but since later we use subclip, it's ok.
     # Pre-calc line heights and y_positions
     line_heights = []
     for line in wrapped_lines:
@@ -162,13 +141,11 @@ def video_func(id, lines):
         else:
             (w, h), baseline = cv2.getTextSize(line, FONT, FONT_SCALE, THICKNESS)
         line_heights.append(h + baseline + LINE_SPACING)
     y_positions = []
     y = MARGIN_Y
     for lh in line_heights:
         y_positions.append(y)
         y += lh
     # Prepare ffmpeg
     ffmpeg_cmd = (
         f'ffmpeg -y '
@@ -178,9 +155,8 @@ def video_func(id, lines):
         f'{silent_video_path}'
     )
     print("FFMPEG CMD:", ffmpeg_cmd)
     proc = subprocess.Popen(shlex.split(ffmpeg_cmd), stdin=subprocess.PIPE, bufsize=10**8)
     # Render function, modified: if pen_x <= 0, no pen
     def render_frame(visible_text, pen_x, pen_y, anim_offset):
         img = np.full((HEIGHT, WIDTH, 3), BG_COLOR, dtype=np.uint8)
@@ -192,8 +168,7 @@ def video_func(id, lines):
             y_draw = y + h
             if line != "":
                 cv2.putText(img, line, (x, y_draw), FONT, FONT_SCALE, TEXT_COLOR, THICKNESS, lineType=cv2.LINE_AA)
-        if pen_x > 0:  # Only draw pen if pen_x > 0
             offset_y = int(PEN_MOVEMENT_AMPLITUDE * math.sin(anim_offset * math.pi))
             pen_tip_y = pen_y + offset_y
             angle_rad = math.radians(PEN_BASE_ANGLE)
@@ -201,9 +176,8 @@ def video_func(id, lines):
             pen_end_y = pen_tip_y - int(PEN_LENGTH * math.sin(angle_rad))
             cv2.line(img, (pen_x, pen_tip_y), (pen_end_x, pen_end_y), PEN_COLOR, PEN_THICKNESS)
             cv2.circle(img, (pen_x, pen_tip_y), PEN_TIP_RADIUS, PEN_COLOR, -1)
         return img
     t0 = time.time()
     frames_sent = 0
     prev_visible_sub = ""
@@ -211,7 +185,6 @@ def video_func(id, lines):
     last_pen_y = 0
     for rank, idx_in_full in enumerate(visible_indices):
         visible_sub = full_text[:idx_in_full + 1]
         if visible_sub != prev_visible_sub:
             lines = visible_sub.split("\n")
             last_line = lines[-1]
@@ -221,58 +194,37 @@ def video_func(id, lines):
             pen_y = y_positions[line_idx] + h // 2
             last_pen_x = pen_x
             last_pen_y = pen_y
             for anim_step in range(ANIMATION_FRAMES_PER_CHAR):
                 frame_img = render_frame(visible_sub, pen_x, pen_y, anim_step / ANIMATION_FRAMES_PER_CHAR)
                 proc.stdin.write(frame_img.tobytes())
                 frames_sent += 1
             prev_visible_sub = visible_sub
-        for r in range(REPEAT_FRAMES_PER_CHAR):
-            frame_img = render_frame(visible_sub, pen_x, pen_y, 0)
-            proc.stdin.write(frame_img.tobytes())
-            frames_sent += 1
-    # Add remaining frames as hold with pen (or without, but keep consistent)
-    for _ in range(remaining_frames):
-        frame_img = render_frame(full_text, last_pen_x, last_pen_y, 0)
-        proc.stdin.write(frame_img.tobytes())
-        frames_sent += 1
-    # To pad if still short (but shouldn't be), but we can skip since approximate.
     proc.stdin.close()
     proc.wait()
     elapsed = time.time() - t0
     print(f"Frames sent: {frames_sent}, elapsed time: {elapsed:.3f} seconds")
     if not os.path.exists(silent_video_path):
         print("Silent video generation failed.")
         return None
-    # Now combine with audio using MoviePy
     final_video_name = f"clip{id}.mp4"
     final_video_path = os.path.join(CLIPS_DIR, final_video_name)
     video_clip = VideoFileClip(silent_video_path)
-    audio_clip = AudioFileClip(audio_path)
-    # Set video duration to exactly match audio (speed up/slow down if necessary, but since we adjusted, should be close)
-    # If video longer, subclip to audio duration; if shorter, it will pad silence but since we padded, likely longer or equal.
-    final_clip = video_clip.set_duration(duration).set_audio(audio_clip)
     # Write final video
-    final_clip.write_videofile(final_video_path, codec='libx264', audio_codec='aac', preset='ultrafast')
     # Print the final video file name
     print(f"Final video saved at: {final_video_path}")
     # For notebook display (comment out if not needed in HF Spaces)
     # if os.path.exists(final_video_path):
-    #     display(Video(final_video_path, embed=True, width=WIDTH, height=HEIGHT))
     # Clean up silent video if not needed
     os.remove(silent_video_path)
     return final_video_path

 from moviepy.editor import *
+from moviepy.video.fx.all import speedx
 from PIL import Image
 import pytesseract
 import numpy as np
 import cv2
 import numpy as np
 import subprocess, shlex, os, time
+# from IPython.display import Video, display, HTML # Commented out for Hugging Face Spaces compatibility
 import math
 # Use /app/data which we created with proper permissions
 BASE_DIR = "/app/data"
 IMAGE_DIR = "/tmp/images"
 os.makedirs(IMAGE_DIR, exist_ok=True)
 AUDIO_DIR = os.path.join(BASE_DIR, "sound")
 CLIPS_DIR = os.path.join(BASE_DIR, "video")
 # Create directories (no chmod needed)
 for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
     Path(path).mkdir(parents=True, exist_ok=True)
 async def generate_tts(id, lines):
     voice = "en-US-JennyNeural"
     audio_name = f"audio{id}.mp3"
     #lang = listf[1].strip()
     communicate = edge_tts.Communicate(text=lines[id], voice=voice, rate="+0%")
     await communicate.save(audio_path)
     if os.path.exists(audio_path):
         audio = MP3(audio_path)
         duration = audio.info.length
         return duration, audio_path
     return None, None
 def audio_func(id, lines):
     return asyncio.run(generate_tts(id, lines))
 def video_func(id, lines):
     duration, audio_path = audio_func(id, lines)
     if not duration or not audio_path:
         print("Failed to generate audio.")
         return None
     #listf = lines.split("&&&")
     #TEXT = listf[0].strip()
     TEXT=lines[id]
     SKIP_SPACES = False
+    FPS = 30 # Increased for smoother animation
     ANIMATION_FRAMES_PER_CHAR = 3 # Number of sub-frames for pen movement per character
+    WIDTH, HEIGHT = 1280, 720 # Keep as is
     MARGIN_X, MARGIN_Y = 40, 60
+    LINE_SPACING = 8 # additional px between lines
     FONT = cv2.FONT_HERSHEY_SIMPLEX
+    FONT_SCALE = 1.0 # tweak for desired size
     THICKNESS = 2
+    TEXT_COLOR = (0, 0, 0) # BGR
+    BG_COLOR = (255, 255, 255) # BGR
     silent_video_name = f"silent_video{id}.mp4"
     silent_video_path = os.path.join(CLIPS_DIR, silent_video_name)
+    FFMPEG_PRESET = "ultrafast" # fastest encode
+    CRF = 23 # For faster encoding
     # Pen settings
+    PEN_COLOR = (0, 0, 255) # Red pen for visibility (BGR)
+    PEN_TIP_RADIUS = 5 # Size of pen tip circle
+    PEN_LENGTH = 20 # Length of pen line
+    PEN_THICKNESS = 2 # Thickness of pen line
+    PEN_BASE_ANGLE = 45 # Base angle of pen (degrees)
+    PEN_MOVEMENT_AMPLITUDE = 10 # How much the pen moves up/down (pixels)
     # ===================================
     # Helper: wrap text by pixel width using cv2.getTextSize
     def wrap_text_cv(text, font, font_scale, thickness, max_width):
         wrapped_lines = []
         for para in text.splitlines():
             if para == "":
+                wrapped_lines.append("") # preserve blank line
                 continue
             words = para.split(" ")
             cur = ""
             if cur != "":
                 wrapped_lines.append(cur)
         return wrapped_lines
     # Pre-wrap text
     text_area_width = WIDTH - 2 * MARGIN_X
     wrapped_lines = wrap_text_cv(TEXT, FONT, FONT_SCALE, THICKNESS, text_area_width)
     full_text = "\n".join(wrapped_lines)
     if not full_text:
         full_text = ""
     # Visible indices
     if SKIP_SPACES:
         visible_indices = [i for i, ch in enumerate(full_text) if (ch != ' ' and ch != '\n' and ch != '\t')]
     else:
         visible_indices = list(range(len(full_text)))
     total_glyphs = len(visible_indices)
     print(f"Wrapped lines: {len(wrapped_lines)} lines, total glyphs (counted): {total_glyphs}")
     if total_glyphs == 0:
         print("No text to animate.")
         return None
+    # Always render the minimal animation frames for the full text (no repeats or padding during rendering)
     min_frames = total_glyphs * ANIMATION_FRAMES_PER_CHAR
+    print(f"Rendering {min_frames} minimal frames for full text animation.")
     # Pre-calc line heights and y_positions
     line_heights = []
     for line in wrapped_lines:
         else:
             (w, h), baseline = cv2.getTextSize(line, FONT, FONT_SCALE, THICKNESS)
         line_heights.append(h + baseline + LINE_SPACING)
     y_positions = []
     y = MARGIN_Y
     for lh in line_heights:
         y_positions.append(y)
         y += lh
     # Prepare ffmpeg
     ffmpeg_cmd = (
         f'ffmpeg -y '
         f'{silent_video_path}'
     )
     print("FFMPEG CMD:", ffmpeg_cmd)
     proc = subprocess.Popen(shlex.split(ffmpeg_cmd), stdin=subprocess.PIPE, bufsize=10**8)
     # Render function, modified: if pen_x <= 0, no pen
     def render_frame(visible_text, pen_x, pen_y, anim_offset):
         img = np.full((HEIGHT, WIDTH, 3), BG_COLOR, dtype=np.uint8)
             y_draw = y + h
             if line != "":
                 cv2.putText(img, line, (x, y_draw), FONT, FONT_SCALE, TEXT_COLOR, THICKNESS, lineType=cv2.LINE_AA)
+        if pen_x > 0: # Only draw pen if pen_x > 0
             offset_y = int(PEN_MOVEMENT_AMPLITUDE * math.sin(anim_offset * math.pi))
             pen_tip_y = pen_y + offset_y
             angle_rad = math.radians(PEN_BASE_ANGLE)
             pen_end_y = pen_tip_y - int(PEN_LENGTH * math.sin(angle_rad))
             cv2.line(img, (pen_x, pen_tip_y), (pen_end_x, pen_end_y), PEN_COLOR, PEN_THICKNESS)
             cv2.circle(img, (pen_x, pen_tip_y), PEN_TIP_RADIUS, PEN_COLOR, -1)
         return img
     t0 = time.time()
     frames_sent = 0
     prev_visible_sub = ""
     last_pen_y = 0
     for rank, idx_in_full in enumerate(visible_indices):
         visible_sub = full_text[:idx_in_full + 1]
         if visible_sub != prev_visible_sub:
             lines = visible_sub.split("\n")
             last_line = lines[-1]
             pen_y = y_positions[line_idx] + h // 2
             last_pen_x = pen_x
             last_pen_y = pen_y
             for anim_step in range(ANIMATION_FRAMES_PER_CHAR):
                 frame_img = render_frame(visible_sub, pen_x, pen_y, anim_step / ANIMATION_FRAMES_PER_CHAR)
                 proc.stdin.write(frame_img.tobytes())
                 frames_sent += 1
             prev_visible_sub = visible_sub
+    # No repeat or remaining frames added during rendering - full minimal animation only
     proc.stdin.close()
     proc.wait()
     elapsed = time.time() - t0
     print(f"Frames sent: {frames_sent}, elapsed time: {elapsed:.3f} seconds")
     if not os.path.exists(silent_video_path):
         print("Silent video generation failed.")
         return None
+    # Now combine with audio using MoviePy: always render full text animation, then adjust speed to match audio duration
     final_video_name = f"clip{id}.mp4"
     final_video_path = os.path.join(CLIPS_DIR, final_video_name)
     video_clip = VideoFileClip(silent_video_path)
+    rendered_duration = video_clip.duration
+    print(f"Rendered video duration: {rendered_duration:.3f}s, Audio duration: {duration:.3f}s")
+    if rendered_duration > 0 and duration > 0:
+        speed_factor = rendered_duration / duration
+        print(f"Adjusting video speed by factor: {speed_factor:.3f}")
+        video_clip = video_clip.fx(speedx, speed_factor)
+    final_clip = video_clip.set_audio(AudioFileClip(audio_path))
     # Write final video
+    final_clip.write_videofile(final_video_path, codec='libx264', audio_codec='aac', preset='ultrafast', verbose=False, logger=None)
     # Print the final video file name
     print(f"Final video saved at: {final_video_path}")
     # For notebook display (comment out if not needed in HF Spaces)
     # if os.path.exists(final_video_path):
+    # display(Video(final_video_path, embed=True, width=WIDTH, height=HEIGHT))
     # Clean up silent video if not needed
     os.remove(silent_video_path)
     return final_video_path