Spaces:

sheikhed
/

amz

Sleeping

App Files Files Community

amz / video_script.py

sheikhed

Update video_script.py

fb5036d verified about 1 year ago

raw

history blame contribute delete

8.55 kB

	import json
	import os
	import requests
	from urllib.parse import urlparse
	import cv2
	import numpy as np
	from pydub import AudioSegment
	import random
	import subprocess
	from urllib.request import urlretrieve
	from openai import OpenAI
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# Initialize OpenAI client
	client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

	def download_images(json_file_name, folder_name):
	script_dir = os.path.dirname(os.path.abspath(__file__))
	json_file_path = os.path.join(script_dir, json_file_name)
	images_folder_path = os.path.join(script_dir, folder_name)

	if not os.path.exists(images_folder_path):
	os.makedirs(images_folder_path)

	with open(json_file_path, 'r') as file:
	data = json.load(file)

	images = data.get('images', [])

	for index, image_url in enumerate(images):
	parsed_url = urlparse(image_url)
	file_extension = os.path.splitext(parsed_url.path)[1]
	file_name = f"image_{index + 1}{file_extension}"
	file_path = os.path.join(images_folder_path, file_name)

	try:
	response = requests.get(image_url, timeout=10)
	response.raise_for_status()
	with open(file_path, 'wb') as file:
	file.write(response.content)
	print(f"Downloaded: {file_name}")
	except requests.exceptions.RequestException as e:
	print(f"Failed to download: {image_url}. Error: {e}")

	def resize_image(image, target_width, target_height, overlay_opacity=0.1):
	h, w = image.shape[:2]
	aspect = w / h

	if aspect > target_width / target_height:
	new_w = target_width
	new_h = int(new_w / aspect)
	else:
	new_h = target_height
	new_w = int(new_h * aspect)

	resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)

	canvas = np.zeros((target_height, target_width, 3), dtype=np.uint8)
	y_offset = (target_height - new_h) // 2
	x_offset = (target_width - new_w) // 2
	canvas[y_offset:y_offset+new_h, x_offset:x_offset+new_w] = resized

	overlay = np.zeros_like(canvas)
	canvas = cv2.addWeighted(canvas, 1 - overlay_opacity, overlay, overlay_opacity, 0)

	return canvas

	def apply_zoom(image, zoom_factor):
	h, w = image.shape[:2]
	crop_h = int(h * (1 / zoom_factor))
	crop_w = int(w * (1 / zoom_factor))

	y1 = (h - crop_h) // 2
	y2 = y1 + crop_h
	x1 = (w - crop_w) // 2
	x2 = x1 + crop_w

	zoomed = image[y1:y2, x1:x2]
	return cv2.resize(zoomed, (w, h), interpolation=cv2.INTER_LINEAR)

	def apply_fade(image1, image2, progress):
	return cv2.addWeighted(image1, 1 - progress, image2, progress, 0)

	def create_video(images_folder, audio_file, output_file, width=1080, height=1920, fps=30, overlay_opacity=0.1):
	image_files = sorted([f for f in os.listdir(images_folder) if f.endswith(('.png', '.jpg', '.jpeg'))])

	audio = AudioSegment.from_mp3(audio_file)
	audio_duration = len(audio) / 1000

	image_duration = audio_duration / len(image_files)
	frames_per_image = int(image_duration * fps)

	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))

	prev_img = None
	for img_file in image_files:
	img = cv2.imread(os.path.join(images_folder, img_file))
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	img = resize_image(img, width, height, overlay_opacity)

	if prev_img is None:
	prev_img = np.zeros_like(img)

	transition_frames = int(fps * 0.5)
	effect = random.choice(['zoom_in', 'zoom_out', 'none'])
	max_zoom = 1.1 if effect != 'none' else 1.0

	for frame in range(frames_per_image):
	progress = frame / frames_per_image

	if effect == 'zoom_in':
	zoom_factor = 1 + (max_zoom - 1) * progress
	elif effect == 'zoom_out':
	zoom_factor = max_zoom - (max_zoom - 1) * progress
	else:
	zoom_factor = 1

	zoomed_img = apply_zoom(img, zoom_factor)

	if frame < transition_frames:
	fade_progress = frame / transition_frames
	frame_img = apply_fade(prev_img, zoomed_img, fade_progress)
	else:
	frame_img = zoomed_img

	out.write(cv2.cvtColor(frame_img, cv2.COLOR_RGB2BGR))

	prev_img = zoomed_img

	black_frame = np.zeros_like(prev_img)
	for frame in range(transition_frames):
	progress = frame / transition_frames
	frame_img = apply_fade(prev_img, black_frame, progress)
	out.write(cv2.cvtColor(frame_img, cv2.COLOR_RGB2BGR))

	out.release()

	temp_output = 'temp_output.mp4'
	os.rename(output_file, temp_output)
	os.system(f"ffmpeg -i {temp_output} -i {audio_file} -c:v copy -c:a aac {output_file}")
	os.remove(temp_output)

	def transcribe_audio(audio_file):
	with open(audio_file, "rb") as file:
	transcript = client.audio.transcriptions.create(
	model="whisper-1",
	file=file,
	response_format="text"
	)
	return transcript

	def split_into_chunks(text, chunk_size=3):
	words = text.split()
	return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

	def create_ass_file(chunks, chunk_duration, output_ass, video_width, video_height):
	font_size = int(video_height * 0.05)
	impact_font_url = "https://picfy.xyz/uploads/impact.ttf"
	impact_font_path = "impact.ttf"

	# Download Impact font if not present
	if not os.path.exists(impact_font_path):
	urlretrieve(impact_font_url, impact_font_path)

	with open(output_ass, 'w') as f:
	f.write("[Script Info]\nScriptType: v4.00+\nPlayResX: {}\nPlayResY: {}\n\n".format(video_width, video_height))
	f.write("[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n")
	f.write("Style: Default,Impact,{},&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,0,5,10,10,10,1\n\n".format(font_size))
	f.write("[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n")

	for i, chunk in enumerate(chunks):
	start_time = i * chunk_duration
	end_time = (i + 1) * chunk_duration
	f.write("Dialogue: 0,{},{},Default,,0,0,0,,{}\n".format(
	format_time(start_time),
	format_time(end_time),
	chunk
	))

	def format_time(seconds):
	hours = int(seconds // 3600)
	minutes = int((seconds % 3600) // 60)
	seconds = seconds % 60
	return f"{hours:01d}:{minutes:02d}:{seconds:05.2f}"

	def add_captions_to_video(video_file, audio_file, output_file):
	transcript = transcribe_audio(audio_file)
	chunks = split_into_chunks(transcript)

	ffprobe_cmd = f"ffprobe -v error -select_streams v:0 -count_packets -show_entries stream=width,height,duration -of csv=p=0 {video_file}"
	video_info = subprocess.check_output(ffprobe_cmd, shell=True).decode().strip().split(',')
	video_width, video_height, video_duration = map(float, video_info)

	chunk_duration = video_duration / len(chunks)

	ass_file = "subtitles.ass"
	create_ass_file(chunks, chunk_duration, ass_file, int(video_width), int(video_height))

	ffmpeg_cmd = f"ffmpeg -i {video_file} -i {audio_file} -vf \"ass={ass_file}:fontsdir=.\" -c:a aac -c:v libx264 {output_file}"
	subprocess.run(ffmpeg_cmd, shell=True, check=True)

	os.remove(ass_file)

	def generate_video(session_id):
	temp_dir = f'temp_{session_id}'
	json_file_name = f'{temp_dir}/data.json'
	images_folder = f'{temp_dir}/images'
	audio_file = f'{temp_dir}/voice.mp3'
	initial_video = f'{temp_dir}/video.mp4'
	final_video = f'{temp_dir}/output_video.mp4'

	# Step 1: Download images
	download_images(json_file_name, images_folder)

	# Step 2: Create initial video
	create_video(images_folder, audio_file, initial_video, overlay_opacity=0.3)

	# Step 3: Add captions to the video
	add_captions_to_video(initial_video, audio_file, final_video)

	print("Video processing complete. Output saved as", final_video)
	return final_video