NotestoVid / vid.py
Rahul-Sainy's picture
font path changed for hf
f4694ea verified
import os
import cv2
from PIL import Image, ImageDraw, ImageFont
from pydub import AudioSegment
import numpy as np
import moviepy.editor as mp
def get_audio_duration(audio_file):
try:
audio_segment = AudioSegment.from_file(audio_file)
duration = len(audio_segment) / 1000.0 # Duration in seconds
# Adjust duration slightly less than actual to avoid edge case errors
return duration - 0.01 # Adjust as needed
except Exception as e:
print(f"Error loading audio file {audio_file}: {e}")
return 0.0 # Return 0 duration on error
def resize_image(image, target_width, target_height):
width = image.shape[1]
height = image.shape[0]
# Resize image while maintaining aspect ratio
aspect_ratio = width / height
if aspect_ratio > (target_width / target_height):
new_width = target_width
new_height = int(new_width / aspect_ratio)
else:
new_height = target_height
new_width = int(new_height * aspect_ratio)
resized_image = cv2.resize(image, (new_width, new_height))
# Create a blank white image of the target size
result = np.ones((target_height, target_width, 3), dtype=np.uint8) * 255
# Calculate position to center the resized image
x_offset = (target_width - new_width) // 2
y_offset = (target_height - new_height) // 2
# Paste the resized image onto the blank white image
result[y_offset:y_offset + new_height, x_offset:x_offset + new_width] = resized_image
return result
def wrap_text(text, max_width, font):
lines = []
words = text.split(' ')
line = ''
for word in words:
test_line = f"{line} {word}".strip()
width, _ = font.getsize(test_line)
if width <= max_width:
line = test_line
else:
lines.append(line)
line = word
lines.append(line)
return lines
def add_text_and_images_to_frame(frame, slide, screen_width, screen_height, title_font, content_font, bg_image_path, slide_image_path):
# Convert frame to PIL Image
frame_pil = Image.fromarray(frame)
# Load background image
bg_image = Image.open(bg_image_path)
bg_image = bg_image.resize((screen_width, screen_height))
# Paste background image onto frame
frame_pil.paste(bg_image, (0, 0))
# Create ImageDraw object
draw = ImageDraw.Draw(frame_pil)
# Define colors
title_color = (0, 0, 255)
content_color = (0, 0, 0)
# Define positions and sizes
padding = 50
title_max_width = screen_width - 2 * padding
content_max_width = screen_width // 2 - 2 * padding
# Draw title
title_lines = wrap_text(slide["title"], title_max_width, title_font)
y = padding
for line in title_lines:
line_width, line_height = title_font.getsize(line)
x = (screen_width - line_width) // 2 # Center horizontally
draw.text((x, y), line, font=title_font, fill=title_color)
y += line_height + 10 # Add vertical space after each title line
y += 70 # Add additional space after title
# Draw content
content_lines = []
for content in slide["content"]:
wrapped_lines = wrap_text(content, content_max_width, content_font)
content_lines.extend(wrapped_lines)
for line in content_lines:
draw.text((padding, y), line, font=content_font, fill=content_color)
y += content_font.getsize(line)[1] + 20 # Add vertical space after each content line
# Load and resize slide image
slide_image = Image.open(slide_image_path)
slide_image = slide_image.resize((screen_width // 2, screen_height // 2))
# Paste slide image onto the frame
frame_pil.paste(slide_image, (screen_width // 2, (screen_height - slide_image.height) // 2))
# Convert back to NumPy array
frame = np.array(frame_pil)
return frame
def create(narrations, slides, basedir, output_file, selected_bg):
width, height = 1920, 1080 # Video resolution (1920x1080)
frame_rate = 30
# Pre-load fonts
title_font_path = "WorkSans-Bold.ttf" # Path to the downloaded font
content_font_path = "WorkSans-Bold.ttf"
title_font = ImageFont.truetype(title_font_path, size=70)
content_font = ImageFont.truetype(content_font_path, size=30)
# Load end screen video clip
end_screen_path = os.path.join("end_screen.mp4")
end_screen_clip = mp.VideoFileClip(end_screen_path)
end_screen_clip = end_screen_clip.set_fps(frame_rate)
end_screen_clip = end_screen_clip.resize(width=width, height=height)
video_clips = []
for i, slide in enumerate(slides):
img_path = os.path.join(basedir, f"images/image_{i+1}.webp")
if os.path.exists(img_path):
image = cv2.imread(img_path)
image = resize_image(image, width // 2, height) # Resize image to fit right side
else:
image = np.ones((height, width // 2, 3), dtype=np.uint8) * 255
frame = np.ones((height, width, 3), dtype=np.uint8) * 255 # White background
frame[:, width // 2:] = image # Paste resized image on the right side
bg_image_path = selected_bg
frame = add_text_and_images_to_frame(frame, slide, width, height, title_font, content_font, bg_image_path, img_path)
# Use narration file directly
audio_path = os.path.join(basedir, f"narrations/narration_{i+1}.wav")
narration_duration = get_audio_duration(audio_path)
# Validate narration duration
if narration_duration <= 0.0:
print(f"Skipping invalid audio file: {audio_path}")
continue
clip = mp.ImageClip(frame).set_duration(narration_duration)
audio = mp.AudioFileClip(audio_path).set_duration(narration_duration)
clip = clip.set_audio(audio)
clip = clip.fx(mp.vfx.fadein, duration=0.5).fx(mp.vfx.fadeout, duration=0.5)
video_clips.append(clip)
# Concatenate all video clips including end screen
video_clips.append(end_screen_clip)
final_clip = mp.concatenate_videoclips(video_clips, method="compose")
# Set final clip properties
final_clip = final_clip.set_fps(frame_rate)
final_clip = final_clip.resize(width=width, height=height)
# Save final video
vid_file = os.path.join(basedir, output_file)
final_clip.write_videofile(vid_file, fps=frame_rate, codec='libx264')