|
|
import json |
|
|
import os |
|
|
import requests |
|
|
from urllib.parse import urlparse |
|
|
import cv2 |
|
|
import numpy as np |
|
|
from pydub import AudioSegment |
|
|
import random |
|
|
import subprocess |
|
|
from urllib.request import urlretrieve |
|
|
from openai import OpenAI |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
|
|
def download_images(json_file_name, folder_name): |
|
|
script_dir = os.path.dirname(os.path.abspath(__file__)) |
|
|
json_file_path = os.path.join(script_dir, json_file_name) |
|
|
images_folder_path = os.path.join(script_dir, folder_name) |
|
|
|
|
|
if not os.path.exists(images_folder_path): |
|
|
os.makedirs(images_folder_path) |
|
|
|
|
|
with open(json_file_path, 'r') as file: |
|
|
data = json.load(file) |
|
|
|
|
|
images = data.get('images', []) |
|
|
|
|
|
for index, image_url in enumerate(images): |
|
|
parsed_url = urlparse(image_url) |
|
|
file_extension = os.path.splitext(parsed_url.path)[1] |
|
|
file_name = f"image_{index + 1}{file_extension}" |
|
|
file_path = os.path.join(images_folder_path, file_name) |
|
|
|
|
|
try: |
|
|
response = requests.get(image_url, timeout=10) |
|
|
response.raise_for_status() |
|
|
with open(file_path, 'wb') as file: |
|
|
file.write(response.content) |
|
|
print(f"Downloaded: {file_name}") |
|
|
except requests.exceptions.RequestException as e: |
|
|
print(f"Failed to download: {image_url}. Error: {e}") |
|
|
|
|
|
def resize_image(image, target_width, target_height, overlay_opacity=0.1): |
|
|
h, w = image.shape[:2] |
|
|
aspect = w / h |
|
|
|
|
|
if aspect > target_width / target_height: |
|
|
new_w = target_width |
|
|
new_h = int(new_w / aspect) |
|
|
else: |
|
|
new_h = target_height |
|
|
new_w = int(new_h * aspect) |
|
|
|
|
|
resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA) |
|
|
|
|
|
canvas = np.zeros((target_height, target_width, 3), dtype=np.uint8) |
|
|
y_offset = (target_height - new_h) // 2 |
|
|
x_offset = (target_width - new_w) // 2 |
|
|
canvas[y_offset:y_offset+new_h, x_offset:x_offset+new_w] = resized |
|
|
|
|
|
overlay = np.zeros_like(canvas) |
|
|
canvas = cv2.addWeighted(canvas, 1 - overlay_opacity, overlay, overlay_opacity, 0) |
|
|
|
|
|
return canvas |
|
|
|
|
|
def apply_zoom(image, zoom_factor): |
|
|
h, w = image.shape[:2] |
|
|
crop_h = int(h * (1 / zoom_factor)) |
|
|
crop_w = int(w * (1 / zoom_factor)) |
|
|
|
|
|
y1 = (h - crop_h) // 2 |
|
|
y2 = y1 + crop_h |
|
|
x1 = (w - crop_w) // 2 |
|
|
x2 = x1 + crop_w |
|
|
|
|
|
zoomed = image[y1:y2, x1:x2] |
|
|
return cv2.resize(zoomed, (w, h), interpolation=cv2.INTER_LINEAR) |
|
|
|
|
|
def apply_fade(image1, image2, progress): |
|
|
return cv2.addWeighted(image1, 1 - progress, image2, progress, 0) |
|
|
|
|
|
def create_video(images_folder, audio_file, output_file, width=1080, height=1920, fps=30, overlay_opacity=0.1): |
|
|
image_files = sorted([f for f in os.listdir(images_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]) |
|
|
|
|
|
audio = AudioSegment.from_mp3(audio_file) |
|
|
audio_duration = len(audio) / 1000 |
|
|
|
|
|
image_duration = audio_duration / len(image_files) |
|
|
frames_per_image = int(image_duration * fps) |
|
|
|
|
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v') |
|
|
out = cv2.VideoWriter(output_file, fourcc, fps, (width, height)) |
|
|
|
|
|
prev_img = None |
|
|
for img_file in image_files: |
|
|
img = cv2.imread(os.path.join(images_folder, img_file)) |
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
|
img = resize_image(img, width, height, overlay_opacity) |
|
|
|
|
|
if prev_img is None: |
|
|
prev_img = np.zeros_like(img) |
|
|
|
|
|
transition_frames = int(fps * 0.5) |
|
|
effect = random.choice(['zoom_in', 'zoom_out', 'none']) |
|
|
max_zoom = 1.1 if effect != 'none' else 1.0 |
|
|
|
|
|
for frame in range(frames_per_image): |
|
|
progress = frame / frames_per_image |
|
|
|
|
|
if effect == 'zoom_in': |
|
|
zoom_factor = 1 + (max_zoom - 1) * progress |
|
|
elif effect == 'zoom_out': |
|
|
zoom_factor = max_zoom - (max_zoom - 1) * progress |
|
|
else: |
|
|
zoom_factor = 1 |
|
|
|
|
|
zoomed_img = apply_zoom(img, zoom_factor) |
|
|
|
|
|
if frame < transition_frames: |
|
|
fade_progress = frame / transition_frames |
|
|
frame_img = apply_fade(prev_img, zoomed_img, fade_progress) |
|
|
else: |
|
|
frame_img = zoomed_img |
|
|
|
|
|
out.write(cv2.cvtColor(frame_img, cv2.COLOR_RGB2BGR)) |
|
|
|
|
|
prev_img = zoomed_img |
|
|
|
|
|
black_frame = np.zeros_like(prev_img) |
|
|
for frame in range(transition_frames): |
|
|
progress = frame / transition_frames |
|
|
frame_img = apply_fade(prev_img, black_frame, progress) |
|
|
out.write(cv2.cvtColor(frame_img, cv2.COLOR_RGB2BGR)) |
|
|
|
|
|
out.release() |
|
|
|
|
|
temp_output = 'temp_output.mp4' |
|
|
os.rename(output_file, temp_output) |
|
|
os.system(f"ffmpeg -i {temp_output} -i {audio_file} -c:v copy -c:a aac {output_file}") |
|
|
os.remove(temp_output) |
|
|
|
|
|
def transcribe_audio(audio_file): |
|
|
with open(audio_file, "rb") as file: |
|
|
transcript = client.audio.transcriptions.create( |
|
|
model="whisper-1", |
|
|
file=file, |
|
|
response_format="text" |
|
|
) |
|
|
return transcript |
|
|
|
|
|
def split_into_chunks(text, chunk_size=3): |
|
|
words = text.split() |
|
|
return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] |
|
|
|
|
|
def create_ass_file(chunks, chunk_duration, output_ass, video_width, video_height): |
|
|
font_size = int(video_height * 0.05) |
|
|
impact_font_url = "https://picfy.xyz/uploads/impact.ttf" |
|
|
impact_font_path = "impact.ttf" |
|
|
|
|
|
|
|
|
if not os.path.exists(impact_font_path): |
|
|
urlretrieve(impact_font_url, impact_font_path) |
|
|
|
|
|
with open(output_ass, 'w') as f: |
|
|
f.write("[Script Info]\nScriptType: v4.00+\nPlayResX: {}\nPlayResY: {}\n\n".format(video_width, video_height)) |
|
|
f.write("[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n") |
|
|
f.write("Style: Default,Impact,{},&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,0,5,10,10,10,1\n\n".format(font_size)) |
|
|
f.write("[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n") |
|
|
|
|
|
for i, chunk in enumerate(chunks): |
|
|
start_time = i * chunk_duration |
|
|
end_time = (i + 1) * chunk_duration |
|
|
f.write("Dialogue: 0,{},{},Default,,0,0,0,,{}\n".format( |
|
|
format_time(start_time), |
|
|
format_time(end_time), |
|
|
chunk |
|
|
)) |
|
|
|
|
|
def format_time(seconds): |
|
|
hours = int(seconds // 3600) |
|
|
minutes = int((seconds % 3600) // 60) |
|
|
seconds = seconds % 60 |
|
|
return f"{hours:01d}:{minutes:02d}:{seconds:05.2f}" |
|
|
|
|
|
def add_captions_to_video(video_file, audio_file, output_file): |
|
|
transcript = transcribe_audio(audio_file) |
|
|
chunks = split_into_chunks(transcript) |
|
|
|
|
|
ffprobe_cmd = f"ffprobe -v error -select_streams v:0 -count_packets -show_entries stream=width,height,duration -of csv=p=0 {video_file}" |
|
|
video_info = subprocess.check_output(ffprobe_cmd, shell=True).decode().strip().split(',') |
|
|
video_width, video_height, video_duration = map(float, video_info) |
|
|
|
|
|
chunk_duration = video_duration / len(chunks) |
|
|
|
|
|
ass_file = "subtitles.ass" |
|
|
create_ass_file(chunks, chunk_duration, ass_file, int(video_width), int(video_height)) |
|
|
|
|
|
ffmpeg_cmd = f"ffmpeg -i {video_file} -i {audio_file} -vf \"ass={ass_file}:fontsdir=.\" -c:a aac -c:v libx264 {output_file}" |
|
|
subprocess.run(ffmpeg_cmd, shell=True, check=True) |
|
|
|
|
|
os.remove(ass_file) |
|
|
|
|
|
def generate_video(session_id): |
|
|
temp_dir = f'temp_{session_id}' |
|
|
json_file_name = f'{temp_dir}/data.json' |
|
|
images_folder = f'{temp_dir}/images' |
|
|
audio_file = f'{temp_dir}/voice.mp3' |
|
|
initial_video = f'{temp_dir}/video.mp4' |
|
|
final_video = f'{temp_dir}/output_video.mp4' |
|
|
|
|
|
|
|
|
download_images(json_file_name, images_folder) |
|
|
|
|
|
|
|
|
create_video(images_folder, audio_file, initial_video, overlay_opacity=0.3) |
|
|
|
|
|
|
|
|
add_captions_to_video(initial_video, audio_file, final_video) |
|
|
|
|
|
print("Video processing complete. Output saved as", final_video) |
|
|
return final_video |