sreepathi-ravikumar commited on
Commit
c4b79fe
·
verified ·
1 Parent(s): 1eece32

Update video2.py

Browse files
Files changed (1) hide show
  1. video2.py +36 -84
video2.py CHANGED
@@ -1,4 +1,5 @@
1
  from moviepy.editor import *
 
2
  from PIL import Image
3
  import pytesseract
4
  import numpy as np
@@ -15,20 +16,17 @@ import asyncio
15
  import cv2
16
  import numpy as np
17
  import subprocess, shlex, os, time
18
- # from IPython.display import Video, display, HTML # Commented out for Hugging Face Spaces compatibility
19
  import math
20
-
21
  # Use /app/data which we created with proper permissions
22
  BASE_DIR = "/app/data"
23
  IMAGE_DIR = "/tmp/images"
24
  os.makedirs(IMAGE_DIR, exist_ok=True)
25
  AUDIO_DIR = os.path.join(BASE_DIR, "sound")
26
  CLIPS_DIR = os.path.join(BASE_DIR, "video")
27
-
28
  # Create directories (no chmod needed)
29
  for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
30
  Path(path).mkdir(parents=True, exist_ok=True)
31
-
32
  async def generate_tts(id, lines):
33
  voice = "en-US-JennyNeural"
34
  audio_name = f"audio{id}.mp3"
@@ -38,57 +36,52 @@ async def generate_tts(id, lines):
38
  #lang = listf[1].strip()
39
  communicate = edge_tts.Communicate(text=lines[id], voice=voice, rate="+0%")
40
  await communicate.save(audio_path)
41
-
42
  if os.path.exists(audio_path):
43
  audio = MP3(audio_path)
44
  duration = audio.info.length
45
  return duration, audio_path
46
  return None, None
47
-
48
  def audio_func(id, lines):
49
  return asyncio.run(generate_tts(id, lines))
50
-
51
  def video_func(id, lines):
52
  duration, audio_path = audio_func(id, lines)
53
  if not duration or not audio_path:
54
  print("Failed to generate audio.")
55
  return None
56
-
57
  #listf = lines.split("&&&")
58
  #TEXT = listf[0].strip()
59
  TEXT=lines[id]
60
-
61
  SKIP_SPACES = False
62
-
63
- FPS = 30 # Increased for smoother animation
64
  ANIMATION_FRAMES_PER_CHAR = 3 # Number of sub-frames for pen movement per character
65
- WIDTH, HEIGHT = 1280, 720 # Keep as is
66
  MARGIN_X, MARGIN_Y = 40, 60
67
- LINE_SPACING = 8 # additional px between lines
68
  FONT = cv2.FONT_HERSHEY_SIMPLEX
69
- FONT_SCALE = 1.0 # tweak for desired size
70
  THICKNESS = 2
71
- TEXT_COLOR = (0, 0, 0) # BGR
72
- BG_COLOR = (255, 255, 255) # BGR
73
  silent_video_name = f"silent_video{id}.mp4"
74
  silent_video_path = os.path.join(CLIPS_DIR, silent_video_name)
75
- FFMPEG_PRESET = "ultrafast" # fastest encode
76
- CRF = 23 # For faster encoding
77
  # Pen settings
78
- PEN_COLOR = (0, 0, 255) # Red pen for visibility (BGR)
79
- PEN_TIP_RADIUS = 5 # Size of pen tip circle
80
- PEN_LENGTH = 20 # Length of pen line
81
- PEN_THICKNESS = 2 # Thickness of pen line
82
- PEN_BASE_ANGLE = 45 # Base angle of pen (degrees)
83
- PEN_MOVEMENT_AMPLITUDE = 10 # How much the pen moves up/down (pixels)
84
  # ===================================
85
-
86
  # Helper: wrap text by pixel width using cv2.getTextSize
87
  def wrap_text_cv(text, font, font_scale, thickness, max_width):
88
  wrapped_lines = []
89
  for para in text.splitlines():
90
  if para == "":
91
- wrapped_lines.append("") # preserve blank line
92
  continue
93
  words = para.split(" ")
94
  cur = ""
@@ -120,40 +113,26 @@ def video_func(id, lines):
120
  if cur != "":
121
  wrapped_lines.append(cur)
122
  return wrapped_lines
123
-
124
  # Pre-wrap text
125
  text_area_width = WIDTH - 2 * MARGIN_X
126
  wrapped_lines = wrap_text_cv(TEXT, FONT, FONT_SCALE, THICKNESS, text_area_width)
127
  full_text = "\n".join(wrapped_lines)
128
  if not full_text:
129
  full_text = ""
130
-
131
  # Visible indices
132
  if SKIP_SPACES:
133
  visible_indices = [i for i, ch in enumerate(full_text) if (ch != ' ' and ch != '\n' and ch != '\t')]
134
  else:
135
  visible_indices = list(range(len(full_text)))
136
-
137
  total_glyphs = len(visible_indices)
138
  print(f"Wrapped lines: {len(wrapped_lines)} lines, total glyphs (counted): {total_glyphs}")
139
-
140
  if total_glyphs == 0:
141
  print("No text to animate.")
142
  return None
143
-
144
- # Calculate REPEAT_FRAMES_PER_CHAR to approximate audio duration
145
- desired_frames = math.ceil(duration * FPS)
146
  min_frames = total_glyphs * ANIMATION_FRAMES_PER_CHAR
147
- extra_frames = desired_frames - min_frames
148
- if extra_frames > 0:
149
- REPEAT_FRAMES_PER_CHAR = math.floor(extra_frames / total_glyphs)
150
- remaining_frames = extra_frames % total_glyphs
151
- else:
152
- REPEAT_FRAMES_PER_CHAR = 0
153
- remaining_frames = 0
154
-
155
- # But we'll add remaining as hold at end if needed, but since later we use subclip, it's ok.
156
-
157
  # Pre-calc line heights and y_positions
158
  line_heights = []
159
  for line in wrapped_lines:
@@ -162,13 +141,11 @@ def video_func(id, lines):
162
  else:
163
  (w, h), baseline = cv2.getTextSize(line, FONT, FONT_SCALE, THICKNESS)
164
  line_heights.append(h + baseline + LINE_SPACING)
165
-
166
  y_positions = []
167
  y = MARGIN_Y
168
  for lh in line_heights:
169
  y_positions.append(y)
170
  y += lh
171
-
172
  # Prepare ffmpeg
173
  ffmpeg_cmd = (
174
  f'ffmpeg -y '
@@ -178,9 +155,8 @@ def video_func(id, lines):
178
  f'{silent_video_path}'
179
  )
180
  print("FFMPEG CMD:", ffmpeg_cmd)
181
-
182
  proc = subprocess.Popen(shlex.split(ffmpeg_cmd), stdin=subprocess.PIPE, bufsize=10**8)
183
-
184
  # Render function, modified: if pen_x <= 0, no pen
185
  def render_frame(visible_text, pen_x, pen_y, anim_offset):
186
  img = np.full((HEIGHT, WIDTH, 3), BG_COLOR, dtype=np.uint8)
@@ -192,8 +168,7 @@ def video_func(id, lines):
192
  y_draw = y + h
193
  if line != "":
194
  cv2.putText(img, line, (x, y_draw), FONT, FONT_SCALE, TEXT_COLOR, THICKNESS, lineType=cv2.LINE_AA)
195
-
196
- if pen_x > 0: # Only draw pen if pen_x > 0
197
  offset_y = int(PEN_MOVEMENT_AMPLITUDE * math.sin(anim_offset * math.pi))
198
  pen_tip_y = pen_y + offset_y
199
  angle_rad = math.radians(PEN_BASE_ANGLE)
@@ -201,9 +176,8 @@ def video_func(id, lines):
201
  pen_end_y = pen_tip_y - int(PEN_LENGTH * math.sin(angle_rad))
202
  cv2.line(img, (pen_x, pen_tip_y), (pen_end_x, pen_end_y), PEN_COLOR, PEN_THICKNESS)
203
  cv2.circle(img, (pen_x, pen_tip_y), PEN_TIP_RADIUS, PEN_COLOR, -1)
204
-
205
  return img
206
-
207
  t0 = time.time()
208
  frames_sent = 0
209
  prev_visible_sub = ""
@@ -211,7 +185,6 @@ def video_func(id, lines):
211
  last_pen_y = 0
212
  for rank, idx_in_full in enumerate(visible_indices):
213
  visible_sub = full_text[:idx_in_full + 1]
214
-
215
  if visible_sub != prev_visible_sub:
216
  lines = visible_sub.split("\n")
217
  last_line = lines[-1]
@@ -221,58 +194,37 @@ def video_func(id, lines):
221
  pen_y = y_positions[line_idx] + h // 2
222
  last_pen_x = pen_x
223
  last_pen_y = pen_y
224
-
225
  for anim_step in range(ANIMATION_FRAMES_PER_CHAR):
226
  frame_img = render_frame(visible_sub, pen_x, pen_y, anim_step / ANIMATION_FRAMES_PER_CHAR)
227
  proc.stdin.write(frame_img.tobytes())
228
  frames_sent += 1
229
-
230
  prev_visible_sub = visible_sub
231
-
232
- for r in range(REPEAT_FRAMES_PER_CHAR):
233
- frame_img = render_frame(visible_sub, pen_x, pen_y, 0)
234
- proc.stdin.write(frame_img.tobytes())
235
- frames_sent += 1
236
-
237
- # Add remaining frames as hold with pen (or without, but keep consistent)
238
- for _ in range(remaining_frames):
239
- frame_img = render_frame(full_text, last_pen_x, last_pen_y, 0)
240
- proc.stdin.write(frame_img.tobytes())
241
- frames_sent += 1
242
-
243
- # To pad if still short (but shouldn't be), but we can skip since approximate.
244
-
245
  proc.stdin.close()
246
  proc.wait()
247
  elapsed = time.time() - t0
248
  print(f"Frames sent: {frames_sent}, elapsed time: {elapsed:.3f} seconds")
249
-
250
  if not os.path.exists(silent_video_path):
251
  print("Silent video generation failed.")
252
  return None
253
-
254
- # Now combine with audio using MoviePy
255
  final_video_name = f"clip{id}.mp4"
256
  final_video_path = os.path.join(CLIPS_DIR, final_video_name)
257
-
258
  video_clip = VideoFileClip(silent_video_path)
259
- audio_clip = AudioFileClip(audio_path)
260
-
261
- # Set video duration to exactly match audio (speed up/slow down if necessary, but since we adjusted, should be close)
262
- # If video longer, subclip to audio duration; if shorter, it will pad silence but since we padded, likely longer or equal.
263
- final_clip = video_clip.set_duration(duration).set_audio(audio_clip)
264
-
 
265
  # Write final video
266
- final_clip.write_videofile(final_video_path, codec='libx264', audio_codec='aac', preset='ultrafast')
267
-
268
  # Print the final video file name
269
  print(f"Final video saved at: {final_video_path}")
270
-
271
  # For notebook display (comment out if not needed in HF Spaces)
272
  # if os.path.exists(final_video_path):
273
- # display(Video(final_video_path, embed=True, width=WIDTH, height=HEIGHT))
274
-
275
  # Clean up silent video if not needed
276
  os.remove(silent_video_path)
277
-
278
  return final_video_path
 
1
  from moviepy.editor import *
2
+ from moviepy.video.fx.all import speedx
3
  from PIL import Image
4
  import pytesseract
5
  import numpy as np
 
16
  import cv2
17
  import numpy as np
18
  import subprocess, shlex, os, time
19
+ # from IPython.display import Video, display, HTML # Commented out for Hugging Face Spaces compatibility
20
  import math
 
21
  # Use /app/data which we created with proper permissions
22
  BASE_DIR = "/app/data"
23
  IMAGE_DIR = "/tmp/images"
24
  os.makedirs(IMAGE_DIR, exist_ok=True)
25
  AUDIO_DIR = os.path.join(BASE_DIR, "sound")
26
  CLIPS_DIR = os.path.join(BASE_DIR, "video")
 
27
  # Create directories (no chmod needed)
28
  for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
29
  Path(path).mkdir(parents=True, exist_ok=True)
 
30
  async def generate_tts(id, lines):
31
  voice = "en-US-JennyNeural"
32
  audio_name = f"audio{id}.mp3"
 
36
  #lang = listf[1].strip()
37
  communicate = edge_tts.Communicate(text=lines[id], voice=voice, rate="+0%")
38
  await communicate.save(audio_path)
 
39
  if os.path.exists(audio_path):
40
  audio = MP3(audio_path)
41
  duration = audio.info.length
42
  return duration, audio_path
43
  return None, None
 
44
  def audio_func(id, lines):
45
  return asyncio.run(generate_tts(id, lines))
 
46
  def video_func(id, lines):
47
  duration, audio_path = audio_func(id, lines)
48
  if not duration or not audio_path:
49
  print("Failed to generate audio.")
50
  return None
 
51
  #listf = lines.split("&&&")
52
  #TEXT = listf[0].strip()
53
  TEXT=lines[id]
 
54
  SKIP_SPACES = False
55
+
56
+ FPS = 30 # Increased for smoother animation
57
  ANIMATION_FRAMES_PER_CHAR = 3 # Number of sub-frames for pen movement per character
58
+ WIDTH, HEIGHT = 1280, 720 # Keep as is
59
  MARGIN_X, MARGIN_Y = 40, 60
60
+ LINE_SPACING = 8 # additional px between lines
61
  FONT = cv2.FONT_HERSHEY_SIMPLEX
62
+ FONT_SCALE = 1.0 # tweak for desired size
63
  THICKNESS = 2
64
+ TEXT_COLOR = (0, 0, 0) # BGR
65
+ BG_COLOR = (255, 255, 255) # BGR
66
  silent_video_name = f"silent_video{id}.mp4"
67
  silent_video_path = os.path.join(CLIPS_DIR, silent_video_name)
68
+ FFMPEG_PRESET = "ultrafast" # fastest encode
69
+ CRF = 23 # For faster encoding
70
  # Pen settings
71
+ PEN_COLOR = (0, 0, 255) # Red pen for visibility (BGR)
72
+ PEN_TIP_RADIUS = 5 # Size of pen tip circle
73
+ PEN_LENGTH = 20 # Length of pen line
74
+ PEN_THICKNESS = 2 # Thickness of pen line
75
+ PEN_BASE_ANGLE = 45 # Base angle of pen (degrees)
76
+ PEN_MOVEMENT_AMPLITUDE = 10 # How much the pen moves up/down (pixels)
77
  # ===================================
78
+
79
  # Helper: wrap text by pixel width using cv2.getTextSize
80
  def wrap_text_cv(text, font, font_scale, thickness, max_width):
81
  wrapped_lines = []
82
  for para in text.splitlines():
83
  if para == "":
84
+ wrapped_lines.append("") # preserve blank line
85
  continue
86
  words = para.split(" ")
87
  cur = ""
 
113
  if cur != "":
114
  wrapped_lines.append(cur)
115
  return wrapped_lines
 
116
  # Pre-wrap text
117
  text_area_width = WIDTH - 2 * MARGIN_X
118
  wrapped_lines = wrap_text_cv(TEXT, FONT, FONT_SCALE, THICKNESS, text_area_width)
119
  full_text = "\n".join(wrapped_lines)
120
  if not full_text:
121
  full_text = ""
 
122
  # Visible indices
123
  if SKIP_SPACES:
124
  visible_indices = [i for i, ch in enumerate(full_text) if (ch != ' ' and ch != '\n' and ch != '\t')]
125
  else:
126
  visible_indices = list(range(len(full_text)))
127
+
128
  total_glyphs = len(visible_indices)
129
  print(f"Wrapped lines: {len(wrapped_lines)} lines, total glyphs (counted): {total_glyphs}")
 
130
  if total_glyphs == 0:
131
  print("No text to animate.")
132
  return None
133
+ # Always render the minimal animation frames for the full text (no repeats or padding during rendering)
 
 
134
  min_frames = total_glyphs * ANIMATION_FRAMES_PER_CHAR
135
+ print(f"Rendering {min_frames} minimal frames for full text animation.")
 
 
 
 
 
 
 
 
 
136
  # Pre-calc line heights and y_positions
137
  line_heights = []
138
  for line in wrapped_lines:
 
141
  else:
142
  (w, h), baseline = cv2.getTextSize(line, FONT, FONT_SCALE, THICKNESS)
143
  line_heights.append(h + baseline + LINE_SPACING)
 
144
  y_positions = []
145
  y = MARGIN_Y
146
  for lh in line_heights:
147
  y_positions.append(y)
148
  y += lh
 
149
  # Prepare ffmpeg
150
  ffmpeg_cmd = (
151
  f'ffmpeg -y '
 
155
  f'{silent_video_path}'
156
  )
157
  print("FFMPEG CMD:", ffmpeg_cmd)
158
+
159
  proc = subprocess.Popen(shlex.split(ffmpeg_cmd), stdin=subprocess.PIPE, bufsize=10**8)
 
160
  # Render function, modified: if pen_x <= 0, no pen
161
  def render_frame(visible_text, pen_x, pen_y, anim_offset):
162
  img = np.full((HEIGHT, WIDTH, 3), BG_COLOR, dtype=np.uint8)
 
168
  y_draw = y + h
169
  if line != "":
170
  cv2.putText(img, line, (x, y_draw), FONT, FONT_SCALE, TEXT_COLOR, THICKNESS, lineType=cv2.LINE_AA)
171
+ if pen_x > 0: # Only draw pen if pen_x > 0
 
172
  offset_y = int(PEN_MOVEMENT_AMPLITUDE * math.sin(anim_offset * math.pi))
173
  pen_tip_y = pen_y + offset_y
174
  angle_rad = math.radians(PEN_BASE_ANGLE)
 
176
  pen_end_y = pen_tip_y - int(PEN_LENGTH * math.sin(angle_rad))
177
  cv2.line(img, (pen_x, pen_tip_y), (pen_end_x, pen_end_y), PEN_COLOR, PEN_THICKNESS)
178
  cv2.circle(img, (pen_x, pen_tip_y), PEN_TIP_RADIUS, PEN_COLOR, -1)
 
179
  return img
180
+
181
  t0 = time.time()
182
  frames_sent = 0
183
  prev_visible_sub = ""
 
185
  last_pen_y = 0
186
  for rank, idx_in_full in enumerate(visible_indices):
187
  visible_sub = full_text[:idx_in_full + 1]
 
188
  if visible_sub != prev_visible_sub:
189
  lines = visible_sub.split("\n")
190
  last_line = lines[-1]
 
194
  pen_y = y_positions[line_idx] + h // 2
195
  last_pen_x = pen_x
196
  last_pen_y = pen_y
 
197
  for anim_step in range(ANIMATION_FRAMES_PER_CHAR):
198
  frame_img = render_frame(visible_sub, pen_x, pen_y, anim_step / ANIMATION_FRAMES_PER_CHAR)
199
  proc.stdin.write(frame_img.tobytes())
200
  frames_sent += 1
 
201
  prev_visible_sub = visible_sub
202
+ # No repeat or remaining frames added during rendering - full minimal animation only
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  proc.stdin.close()
204
  proc.wait()
205
  elapsed = time.time() - t0
206
  print(f"Frames sent: {frames_sent}, elapsed time: {elapsed:.3f} seconds")
 
207
  if not os.path.exists(silent_video_path):
208
  print("Silent video generation failed.")
209
  return None
210
+ # Now combine with audio using MoviePy: always render full text animation, then adjust speed to match audio duration
 
211
  final_video_name = f"clip{id}.mp4"
212
  final_video_path = os.path.join(CLIPS_DIR, final_video_name)
 
213
  video_clip = VideoFileClip(silent_video_path)
214
+ rendered_duration = video_clip.duration
215
+ print(f"Rendered video duration: {rendered_duration:.3f}s, Audio duration: {duration:.3f}s")
216
+ if rendered_duration > 0 and duration > 0:
217
+ speed_factor = rendered_duration / duration
218
+ print(f"Adjusting video speed by factor: {speed_factor:.3f}")
219
+ video_clip = video_clip.fx(speedx, speed_factor)
220
+ final_clip = video_clip.set_audio(AudioFileClip(audio_path))
221
  # Write final video
222
+ final_clip.write_videofile(final_video_path, codec='libx264', audio_codec='aac', preset='ultrafast', verbose=False, logger=None)
 
223
  # Print the final video file name
224
  print(f"Final video saved at: {final_video_path}")
 
225
  # For notebook display (comment out if not needed in HF Spaces)
226
  # if os.path.exists(final_video_path):
227
+ # display(Video(final_video_path, embed=True, width=WIDTH, height=HEIGHT))
 
228
  # Clean up silent video if not needed
229
  os.remove(silent_video_path)
 
230
  return final_video_path