backendprocesssuper1

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Sep 7, 2025

Commit

eb0f122

verified ·

1 Parent(s): cb8ee6b

Update video2.py

Browse files

Files changed (1) hide show

video2.py +237 -41

video2.py CHANGED Viewed

@@ -12,6 +12,11 @@ import rust_highlight
 import rust_combiner
 import shutil
 import asyncio
 # Use /app/data which we created with proper permissions
 BASE_DIR = "/app/data"
@@ -24,11 +29,13 @@ CLIPS_DIR = os.path.join(BASE_DIR, "video")
 for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
     Path(path).mkdir(parents=True, exist_ok=True)
-async def generate_tts(id,lines):
-    voice = "en-US-GuyNeural"
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
     communicate = edge_tts.Communicate(text=lines[id], voice=voice, rate="+0%")
     await communicate.save(audio_path)
@@ -38,44 +45,233 @@ async def generate_tts(id,lines):
         return duration, audio_path
     return None, None
-def audio_func(id,lines):
-    return asyncio.run(generate_tts(id,lines))
-# --- CONFIGURATION ---
 def video_func(id, lines):
     duration, audio_path = audio_func(id, lines)
-    image_path = os.path.join(IMAGE_DIR, f"slide{id}.png")
-    img = Image.open(image_path)
-    data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
-    words = []
-    for i in range(len(data['text'])):
-        txt = data['text'][i].strip()
-        if txt and int(data['conf'][i]) > 60:
-            box = (
-                data['left'][i],
-                data['top'][i],
-                data['width'][i],
-                data['height'][i],
-            )
-            words.append((txt, box))
-    clip_file = rust_highlight.render_video(
-        id=id,
-        image_path=image_path,
-        audio_path=audio_path,
-        duration=duration,
-        words=words,
-        output_dir=CLIPS_DIR  # Add your output directory here
     )
-    print(f"Created {clip_file}")
-def video_com(lines):
-    video_path = f"/tmp/video_{uuid.uuid4().hex}.mp4"
-    clips = []
-    for id in range(len(lines)):
-        clip = f"/app/data/video/clip{id}.mp4"
-        clips.append(clip)
-    video_path = rust_combiner.combine_clips(clips)
-    return video_path

 import rust_combiner
 import shutil
 import asyncio
+import cv2
+import numpy as np
+import subprocess, shlex, os, time
+# from IPython.display import Video, display, HTML  # Commented out for Hugging Face Spaces compatibility
+import math
 # Use /app/data which we created with proper permissions
 BASE_DIR = "/app/data"
 for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
     Path(path).mkdir(parents=True, exist_ok=True)
+async def generate_tts(id, lines):
+    voice = "en-US-JennyNeural"
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
+    #listf = lines.split("&&&")
+    #text = listf[0].strip()
+    #lang = listf[1].strip()
     communicate = edge_tts.Communicate(text=lines[id], voice=voice, rate="+0%")
     await communicate.save(audio_path)
         return duration, audio_path
     return None, None
+def audio_func(id, lines):
+    return asyncio.run(generate_tts(id, lines))
 def video_func(id, lines):
     duration, audio_path = audio_func(id, lines)
+    if not duration or not audio_path:
+        print("Failed to generate audio.")
+        return None
+    listf = lines.split("&&&")
+    TEXT = listf[0].strip()
+    SKIP_SPACES = False
+    FPS = 30                      # Increased for smoother animation
+    ANIMATION_FRAMES_PER_CHAR = 3 # Number of sub-frames for pen movement per character
+    WIDTH, HEIGHT = 1280, 720     # Keep as is
+    MARGIN_X, MARGIN_Y = 40, 60
+    LINE_SPACING = 8              # additional px between lines
+    FONT = cv2.FONT_HERSHEY_SIMPLEX
+    FONT_SCALE = 1.0              # tweak for desired size
+    THICKNESS = 2
+    TEXT_COLOR = (0, 0, 0)        # BGR
+    BG_COLOR = (255, 255, 255)    # BGR
+    silent_video_name = f"silent_video{id}.mp4"
+    silent_video_path = os.path.join(CLIPS_DIR, silent_video_name)
+    FFMPEG_PRESET = "ultrafast"   # fastest encode
+    CRF = 23                      # For faster encoding
+    # Pen settings
+    PEN_COLOR = (0, 0, 255)       # Red pen for visibility (BGR)
+    PEN_TIP_RADIUS = 5            # Size of pen tip circle
+    PEN_LENGTH = 20               # Length of pen line
+    PEN_THICKNESS = 2             # Thickness of pen line
+    PEN_BASE_ANGLE = 45           # Base angle of pen (degrees)
+    PEN_MOVEMENT_AMPLITUDE = 10   # How much the pen moves up/down (pixels)
+    # ===================================
+    # Helper: wrap text by pixel width using cv2.getTextSize
+    def wrap_text_cv(text, font, font_scale, thickness, max_width):
+        wrapped_lines = []
+        for para in text.splitlines():
+            if para == "":
+                wrapped_lines.append("")  # preserve blank line
+                continue
+            words = para.split(" ")
+            cur = ""
+            for w in words:
+                candidate = w if cur == "" else cur + " " + w
+                (w_w, w_h), _ = cv2.getTextSize(candidate, font, font_scale, thickness)
+                if w_w <= max_width:
+                    cur = candidate
+                else:
+                    if cur != "":
+                        wrapped_lines.append(cur)
+                    (single_w, _), _ = cv2.getTextSize(w, font, font_scale, thickness)
+                    if single_w > max_width:
+                        chunk = ""
+                        for ch in w:
+                            cand2 = chunk + ch
+                            (c_w, _), _ = cv2.getTextSize(cand2, font, font_scale, thickness)
+                            if c_w <= max_width:
+                                chunk = cand2
+                            else:
+                                wrapped_lines.append(chunk)
+                                chunk = ch
+                        if chunk:
+                            cur = chunk
+                        else:
+                            cur = ""
+                    else:
+                        cur = w
+            if cur != "":
+                wrapped_lines.append(cur)
+        return wrapped_lines
+    # Pre-wrap text
+    text_area_width = WIDTH - 2 * MARGIN_X
+    wrapped_lines = wrap_text_cv(TEXT, FONT, FONT_SCALE, THICKNESS, text_area_width)
+    full_text = "\n".join(wrapped_lines)
+    if not full_text:
+        full_text = ""
+    # Visible indices
+    if SKIP_SPACES:
+        visible_indices = [i for i, ch in enumerate(full_text) if (ch != ' ' and ch != '\n' and ch != '\t')]
+    else:
+        visible_indices = list(range(len(full_text)))
+    total_glyphs = len(visible_indices)
+    print(f"Wrapped lines: {len(wrapped_lines)} lines, total glyphs (counted): {total_glyphs}")
+    if total_glyphs == 0:
+        print("No text to animate.")
+        return None
+    # Calculate REPEAT_FRAMES_PER_CHAR to approximate audio duration
+    desired_frames = math.ceil(duration * FPS)
+    min_frames = total_glyphs * ANIMATION_FRAMES_PER_CHAR
+    extra_frames = desired_frames - min_frames
+    if extra_frames > 0:
+        REPEAT_FRAMES_PER_CHAR = math.floor(extra_frames / total_glyphs)
+        remaining_frames = extra_frames % total_glyphs
+    else:
+        REPEAT_FRAMES_PER_CHAR = 0
+        remaining_frames = 0
+    # But we'll add remaining as hold at end if needed, but since later we use subclip, it's ok.
+    # Pre-calc line heights and y_positions
+    line_heights = []
+    for line in wrapped_lines:
+        if line == "":
+            (w, h), baseline = cv2.getTextSize("Ay", FONT, FONT_SCALE, THICKNESS)
+        else:
+            (w, h), baseline = cv2.getTextSize(line, FONT, FONT_SCALE, THICKNESS)
+        line_heights.append(h + baseline + LINE_SPACING)
+    y_positions = []
+    y = MARGIN_Y
+    for lh in line_heights:
+        y_positions.append(y)
+        y += lh
+    # Prepare ffmpeg
+    ffmpeg_cmd = (
+        f'ffmpeg -y '
+        f'-f rawvideo -pix_fmt bgr24 -s {WIDTH}x{HEIGHT} -r {FPS} -i - '
+        f'-an '
+        f'-c:v libx264 -preset {FFMPEG_PRESET} -crf {CRF} -pix_fmt yuv420p '
+        f'{silent_video_path}'
     )
+    print("FFMPEG CMD:", ffmpeg_cmd)
+    proc = subprocess.Popen(shlex.split(ffmpeg_cmd), stdin=subprocess.PIPE, bufsize=10**8)
+    # Render function, modified: if pen_x <= 0, no pen
+    def render_frame(visible_text, pen_x, pen_y, anim_offset):
+        img = np.full((HEIGHT, WIDTH, 3), BG_COLOR, dtype=np.uint8)
+        lines = visible_text.split("\n")
+        for idx, line in enumerate(lines):
+            x = MARGIN_X
+            y = y_positions[idx]
+            (w, h), baseline = cv2.getTextSize(line, FONT, FONT_SCALE, THICKNESS)
+            y_draw = y + h
+            if line != "":
+                cv2.putText(img, line, (x, y_draw), FONT, FONT_SCALE, TEXT_COLOR, THICKNESS, lineType=cv2.LINE_AA)
+        if pen_x > 0:  # Only draw pen if pen_x > 0
+            offset_y = int(PEN_MOVEMENT_AMPLITUDE * math.sin(anim_offset * math.pi))
+            pen_tip_y = pen_y + offset_y
+            angle_rad = math.radians(PEN_BASE_ANGLE)
+            pen_end_x = pen_x + int(PEN_LENGTH * math.cos(angle_rad))
+            pen_end_y = pen_tip_y - int(PEN_LENGTH * math.sin(angle_rad))
+            cv2.line(img, (pen_x, pen_tip_y), (pen_end_x, pen_end_y), PEN_COLOR, PEN_THICKNESS)
+            cv2.circle(img, (pen_x, pen_tip_y), PEN_TIP_RADIUS, PEN_COLOR, -1)
+        return img
+    t0 = time.time()
+    frames_sent = 0
+    prev_visible_sub = ""
+    last_pen_x = 0
+    last_pen_y = 0
+    for rank, idx_in_full in enumerate(visible_indices):
+        visible_sub = full_text[:idx_in_full + 1]
+        if visible_sub != prev_visible_sub:
+            lines = visible_sub.split("\n")
+            last_line = lines[-1]
+            line_idx = len(lines) - 1
+            (w, h), baseline = cv2.getTextSize(last_line, FONT, FONT_SCALE, THICKNESS)
+            pen_x = MARGIN_X + w + 5
+            pen_y = y_positions[line_idx] + h // 2
+            last_pen_x = pen_x
+            last_pen_y = pen_y
+            for anim_step in range(ANIMATION_FRAMES_PER_CHAR):
+                frame_img = render_frame(visible_sub, pen_x, pen_y, anim_step / ANIMATION_FRAMES_PER_CHAR)
+                proc.stdin.write(frame_img.tobytes())
+                frames_sent += 1
+            prev_visible_sub = visible_sub
+        for r in range(REPEAT_FRAMES_PER_CHAR):
+            frame_img = render_frame(visible_sub, pen_x, pen_y, 0)
+            proc.stdin.write(frame_img.tobytes())
+            frames_sent += 1
+    # Add remaining frames as hold with pen (or without, but keep consistent)
+    for _ in range(remaining_frames):
+        frame_img = render_frame(full_text, last_pen_x, last_pen_y, 0)
+        proc.stdin.write(frame_img.tobytes())
+        frames_sent += 1
+    # To pad if still short (but shouldn't be), but we can skip since approximate.
+    proc.stdin.close()
+    proc.wait()
+    elapsed = time.time() - t0
+    print(f"Frames sent: {frames_sent}, elapsed time: {elapsed:.3f} seconds")
+    if not os.path.exists(silent_video_path):
+        print("Silent video generation failed.")
+        return None
+    # Now combine with audio using MoviePy
+    final_video_name = f"final_video{id}.mp4"
+    final_video_path = os.path.join(CLIPS_DIR, final_video_name)
+    video_clip = VideoFileClip(silent_video_path)
+    audio_clip = AudioFileClip(audio_path)
+    # Set video duration to exactly match audio (speed up/slow down if necessary, but since we adjusted, should be close)
+    # If video longer, subclip to audio duration; if shorter, it will pad silence but since we padded, likely longer or equal.
+    final_clip = video_clip.set_duration(duration).set_audio(audio_clip)
+    # Write final video
+    final_clip.write_videofile(final_video_path, codec='libx264', audio_codec='aac', preset='ultrafast')
+    # Print the final video file name
+    print(f"Final video saved at: {final_video_path}")
+    # For notebook display (comment out if not needed in HF Spaces)
+    # if os.path.exists(final_video_path):
+    #     display(Video(final_video_path, embed=True, width=WIDTH, height=HEIGHT))
+    # Clean up silent video if not needed
+    os.remove(silent_video_path)
+    return final_video_path