Update TARA to latest Tarsier2 checkpoint and runnable demo.

7daf628 about 1 month ago

24.7 kB

	import subprocess
	import os
	from pathlib import Path

	def create_side_by_side_gif(video_paths, output_gif, gap_width=20, fps=10, scale_height=240, gap_color="white", verbose=False):
	"""
	Create a single GIF with multiple videos placed side by side.

	Args:
	video_paths (list): List of paths to input MP4 files
	output_gif (str): Path for output GIF file
	gap_width (int): Width of gap between videos in pixels
	fps (int): Frame rate for output GIF
	scale_height (int): Height to scale all videos to (maintains aspect ratio)
	gap_color (str): Color for gaps between videos (e.g., "white", "black", "red", "#FF0000")
	verbose (bool): Whether to print FFmpeg commands and processing messages
	"""

	if not video_paths:
	raise ValueError("No video paths provided")

	# Verify all input files exist
	for path in video_paths:
	if not os.path.exists(path):
	raise FileNotFoundError(f"Video file not found: {path}")

	# Create filter complex string for FFmpeg
	num_videos = len(video_paths)

	# Input mapping and scaling
	filter_parts = []
	scaled_inputs = []

	for i, _ in enumerate(video_paths):
	# Scale each video to same height while maintaining aspect ratio
	filter_parts.append(f"[{i}:v]scale=-1:{scale_height}[v{i}]")
	scaled_inputs.append(f"[v{i}]")

	# Create horizontal stack with gaps
	if num_videos == 1:
	hstack_filter = f"{scaled_inputs[0]}copy[stacked]"
	else:
	# Create colored gap between videos
	gap_filters = []
	for i in range(num_videos - 1):
	gap_filters.append(f"color={gap_color}:{gap_width}x{scale_height}:d=1[gap{i}]")

	if gap_filters:
	filter_parts.extend(gap_filters)

	# Build hstack input list with gaps
	hstack_inputs = []
	for i in range(num_videos):
	hstack_inputs.append(scaled_inputs[i])
	if i < num_videos - 1: # Add gap after each video except the last
	hstack_inputs.append(f"[gap{i}]")

	hstack_filter = f"{''.join(hstack_inputs)}hstack=inputs={len(hstack_inputs)}[stacked]"

	filter_parts.append(hstack_filter)

	# Complete the filter complex for stacked video
	stacked_filter = ";".join(filter_parts)

	# Build FFmpeg command with two-pass palette approach
	cmd = ["ffmpeg", "-y"] # -y to overwrite output file

	# Add input files
	for video_path in video_paths:
	cmd.extend(["-i", video_path])

	# Add filter complex and output options
	cmd.extend([
	"-filter_complex", f"{stacked_filter};[stacked]split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse=dither=bayer:bayer_scale=3",
	"-r", str(fps), # Set frame rate
	"-loop", "0", # Infinite loop
	output_gif
	])

	if verbose:
	print("Running FFmpeg command:")
	print(" ".join(cmd))
	print("\nProcessing...")

	try:
	result = subprocess.run(cmd, capture_output=True, text=True, check=True)
	if verbose:
	print(f"✓ Successfully created GIF: {output_gif}")
	return True
	except subprocess.CalledProcessError as e:
	if verbose:
	print(f"✗ FFmpeg error: {e.stderr}")
	return False
	except FileNotFoundError:
	if verbose:
	print("✗ FFmpeg not found. Please install FFmpeg first.")
	print(" - Windows: Download from https://ffmpeg.org/download.html")
	print(" - macOS: brew install ffmpeg")
	print(" - Linux: sudo apt install ffmpeg (Ubuntu/Debian)")
	return False

	def create_top_to_bottom_gif(video_paths, output_gif, gap_height=20, fps=10, scale_width=320, gap_color="white", verbose=False):
	"""
	Create a single GIF with multiple videos stacked vertically (top to bottom).

	Args:
	video_paths (list): List of paths to input MP4 files
	output_gif (str): Path for output GIF file
	gap_height (int): Height of gap between videos in pixels
	fps (int): Frame rate for output GIF
	scale_width (int): Width to scale all videos to (maintains aspect ratio)
	gap_color (str): Color for gaps between videos (e.g., "white", "black", "red", "#FF0000")
	verbose (bool): Whether to print FFmpeg commands and processing messages
	"""

	if not video_paths:
	raise ValueError("No video paths provided")

	# Verify all input files exist
	for path in video_paths:
	if not os.path.exists(path):
	raise FileNotFoundError(f"Video file not found: {path}")

	# Create filter complex string for FFmpeg
	num_videos = len(video_paths)

	# Input mapping and scaling
	filter_parts = []
	scaled_inputs = []

	for i, _ in enumerate(video_paths):
	# Scale each video to same width while maintaining aspect ratio
	filter_parts.append(f"[{i}:v]scale={scale_width}:-1[v{i}]")
	scaled_inputs.append(f"[v{i}]")

	# Create vertical stack with gaps
	if num_videos == 1:
	vstack_filter = f"{scaled_inputs[0]}copy[stacked]"
	else:
	# Create colored gap between videos
	gap_filters = []
	for i in range(num_videos - 1):
	gap_filters.append(f"color={gap_color}:{scale_width}x{gap_height}:d=1[gap{i}]")

	if gap_filters:
	filter_parts.extend(gap_filters)

	# Build vstack input list with gaps
	vstack_inputs = []
	for i in range(num_videos):
	vstack_inputs.append(scaled_inputs[i])
	if i < num_videos - 1: # Add gap after each video except the last
	vstack_inputs.append(f"[gap{i}]")

	vstack_filter = f"{''.join(vstack_inputs)}vstack=inputs={len(vstack_inputs)}[stacked]"

	filter_parts.append(vstack_filter)

	# Complete the filter complex for stacked video
	stacked_filter = ";".join(filter_parts)

	# Build FFmpeg command with two-pass palette approach
	cmd = ["ffmpeg", "-y"] # -y to overwrite output file

	# Add input files
	for video_path in video_paths:
	cmd.extend(["-i", video_path])

	# Add filter complex and output options
	cmd.extend([
	"-filter_complex", f"{stacked_filter};[stacked]split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse=dither=bayer:bayer_scale=3",
	"-r", str(fps), # Set frame rate
	"-loop", "0", # Infinite loop
	output_gif
	])

	if verbose:
	print("Running FFmpeg command:")
	print(" ".join(cmd))
	print("\nProcessing...")

	try:
	result = subprocess.run(cmd, capture_output=True, text=True, check=True)
	if verbose:
	print(f"✓ Successfully created GIF: {output_gif}")
	return True
	except subprocess.CalledProcessError as e:
	if verbose:
	print(f"✗ FFmpeg error: {e.stderr}")
	return False
	except FileNotFoundError:
	if verbose:
	print("✗ FFmpeg not found. Please install FFmpeg first.")
	print(" - Windows: Download from https://ffmpeg.org/download.html")
	print(" - macOS: brew install ffmpeg")
	print(" - Linux: sudo apt install ffmpeg (Ubuntu/Debian)")
	return False

	def reverse_video(input_video_path, output_filename=None, verbose=False):
	"""
	Reverse a video file and save it to /tmp directory.

	Args:
	input_video_path (str): Path to input MP4 file
	output_filename (str, optional): Name for output file. If None, generates from input filename
	verbose (bool): Whether to print FFmpeg commands and processing messages

	Returns:
	str: Path to the reversed video file in /tmp, or None if failed
	"""

	if not os.path.exists(input_video_path):
	raise FileNotFoundError(f"Input video file not found: {input_video_path}")

	# Generate output filename if not provided
	if output_filename is None:
	input_name = Path(input_video_path).stem
	output_filename = f"{input_name}_reversed.mp4"

	# Ensure output filename has .mp4 extension
	if not output_filename.endswith('.mp4'):
	output_filename += '.mp4'

	# Create output path in /tmp
	output_path = os.path.join('/tmp', output_filename)

	# Build FFmpeg command to reverse video
	cmd = [
	"ffmpeg", "-y", # -y to overwrite output file
	"-i", input_video_path,
	"-vf", "reverse", # Video filter to reverse frames
	"-af", "areverse", # Audio filter to reverse audio
	"-c:v", "libx264", # Video codec
	"-c:a", "aac", # Audio codec
	output_path
	]

	if verbose:
	print("Running FFmpeg command to reverse video:")
	print(" ".join(cmd))
	print("\nProcessing...")

	try:
	result = subprocess.run(cmd, capture_output=True, text=True, check=True)
	if verbose:
	print(f"✓ Successfully created reversed video: {output_path}")
	return output_path
	except subprocess.CalledProcessError as e:
	if verbose:
	print(f"✗ FFmpeg error: {e.stderr}")
	return None
	except FileNotFoundError:
	if verbose:
	print("✗ FFmpeg not found. Please install FFmpeg first.")
	print(" - Windows: Download from https://ffmpeg.org/download.html")
	print(" - macOS: brew install ffmpeg")
	print(" - Linux: sudo apt install ffmpeg (Ubuntu/Debian)")
	return None

	def add_text_overlay(input_video_path, text, output_filename=None, font_size=12, font_color="white",
	background_color="black", position="top", margin=10, duration=None, verbose=False):
	"""
	Add a text overlay to a video with a background title bar.

	Args:
	input_video_path (str): Path to input MP4 file
	text (str): Text to display
	output_filename (str, optional): Name for output file. If None, generates from input filename
	font_size (int): Font size for the text (default: 24)
	font_color (str): Color of the text (default: "white")
	background_color (str): Color of the background bar (default: "black")
	position (str): Position of text bar - "top", "bottom", "center" (default: "top")
	margin (int): Margin from edge in pixels (default: 10)
	duration (float, optional): Duration to show text in seconds. If None, shows for entire video
	verbose (bool): Whether to print FFmpeg commands and processing messages

	Returns:
	str: Path to the video with text overlay in /tmp, or None if failed
	"""

	if not os.path.exists(input_video_path):
	raise FileNotFoundError(f"Input video file not found: {input_video_path}")

	# Generate output filename if not provided
	if output_filename is None:
	input_name = Path(input_video_path).stem
	output_filename = f"{input_name}_with_text.mp4"

	# Ensure output filename has .mp4 extension
	if not output_filename.endswith('.mp4'):
	output_filename += '.mp4'

	# Create output path in /tmp
	output_path = os.path.join('/tmp', output_filename)

	# Determine text position based on position parameter
	if position == "top":
	text_position = f"x={margin}:y={margin+5}" # Add small offset for background box
	elif position == "bottom":
	text_position = f"x={margin}:y=h-th-{margin+5}" # Add small offset for background box
	elif position == "center":
	text_position = f"x={margin}:y=(h-th)/2"
	else:
	text_position = f"x={margin}:y={margin+5}" # Default to top with offset

	# Build the drawtext filter
	drawtext_filter = f"drawtext=text='{text}':fontsize={font_size}:fontcolor={font_color}:{text_position}"

	# Add background box if needed
	if background_color != "transparent":
	# Create a semi-transparent background box with estimated height based on font size
	# Estimate text height as approximately 1.2 * font_size
	estimated_text_height = int(font_size * 1.2)
	box_height = estimated_text_height + 10 # Add padding

	# Position box based on text position
	if position == "top":
	box_y = margin
	elif position == "bottom":
	box_y = f"h-{box_height}-{margin}"
	elif position == "center":
	box_y = f"(h-{box_height})/2"
	else:
	box_y = margin

	box_filter = f"drawbox=x={margin-5}:y={box_y}:w=iw-{2*(margin-5)}:h={box_height}:color={background_color}@0.7:t=fill"
	drawtext_filter = f"{box_filter},{drawtext_filter}"

	# Add duration constraint if specified
	if duration is not None:
	drawtext_filter += f":enable='between(t,0,{duration})'"

	# Build FFmpeg command
	cmd = [
	"ffmpeg", "-y", # -y to overwrite output file
	"-i", input_video_path,
	"-vf", drawtext_filter,
	"-c:v", "libx264", # Video codec
	"-c:a", "copy", # Copy audio without re-encoding
	output_path
	]

	if verbose:
	print("Running FFmpeg command to add text overlay:")
	print(" ".join(cmd))
	print("\nProcessing...")

	try:
	result = subprocess.run(cmd, capture_output=True, text=True, check=True)
	if verbose:
	print(f"✓ Successfully created video with text overlay: {output_path}")
	return output_path
	except subprocess.CalledProcessError as e:
	if verbose:
	print(f"✗ FFmpeg error: {e.stderr}")
	return None
	except FileNotFoundError:
	if verbose:
	print("✗ FFmpeg not found. Please install FFmpeg first.")
	print(" - Windows: Download from https://ffmpeg.org/download.html")
	print(" - macOS: brew install ffmpeg")
	print(" - Linux: sudo apt install ffmpeg (Ubuntu/Debian)")
	return None

	def add_text_strip(input_video_path, text, output_filename=None, font_size=16, font_color="white",
	background_color="black", position="top", text_padding=20, max_width_ratio=0.9, verbose=False):
	"""
	Add a text strip/bar to a video (increases video height) rather than overlaying text.

	Args:
	input_video_path (str): Path to input MP4 file
	text (str): Text to display in the strip
	output_filename (str, optional): Name for output file. If None, generates from input filename
	font_size (int): Font size for the text (default: 16)
	font_color (str): Color of the text (default: "white")
	background_color (str): Color of the background strip (default: "black")
	position (str): Position of text strip - "top" or "bottom" (default: "top")
	text_padding (int): Padding around text in pixels (default: 20)
	max_width_ratio (float): Maximum width of text as ratio of video width (default: 0.9)
	verbose (bool): Whether to print FFmpeg commands and processing messages

	Returns:
	str: Path to the video with text strip in /tmp, or None if failed
	"""

	if not os.path.exists(input_video_path):
	raise FileNotFoundError(f"Input video file not found: {input_video_path}")

	# Generate output filename if not provided
	if output_filename is None:
	input_name = Path(input_video_path).stem
	output_filename = f"{input_name}_with_strip.mp4"

	# Ensure output filename has .mp4 extension
	if not output_filename.endswith('.mp4'):
	output_filename += '.mp4'

	# Create output path in /tmp
	output_path = os.path.join('/tmp', output_filename)

	# Calculate text strip height based on font size, padding, and estimated line count
	# Estimate characters per line based on font size (roughly 2 characters per font size pixel)
	estimated_chars_per_line = int(font_size * 2)
	text_lines = text.split('\n') if '\n' in text else [text]

	# If text is too long, wrap it
	wrapped_lines = []
	for line in text_lines:
	if len(line) <= estimated_chars_per_line:
	wrapped_lines.append(line)
	else:
	# Simple word wrapping
	words = line.split(' ')
	current_line = ""
	for word in words:
	if len(current_line + " " + word) <= estimated_chars_per_line:
	current_line += (" " + word) if current_line else word
	else:
	if current_line:
	wrapped_lines.append(current_line)
	current_line = word
	if current_line:
	wrapped_lines.append(current_line)

	# Calculate strip height based on number of lines
	line_height = font_size + 5 # Add some line spacing
	strip_height = (len(wrapped_lines) * line_height) + (2 * text_padding)

	# Create text strip using a different approach - pad the video and add text
	# This will add padding above the video and put text in that padded area
	if position == "top":
	# Add padding to top of video and put text in the padded area
	text_strip_filter = f"[0:v]pad=iw:ih+{strip_height}:0:{strip_height}:{background_color}[padded];[padded]drawtext=text='{chr(10).join(wrapped_lines)}':fontsize={font_size}:fontcolor={font_color}:x=(w-tw)/2:y={text_padding}:line_spacing={line_height}[stacked]"
	else: # bottom
	# Add padding to bottom of video and put text in the padded area
	text_strip_filter = f"[0:v]pad=iw:ih+{strip_height}:0:0:{background_color}[padded];[padded]drawtext=text='{chr(10).join(wrapped_lines)}':fontsize={font_size}:fontcolor={font_color}:x=(w-tw)/2:y=h-th-{text_padding}:line_spacing={line_height}[stacked]"

	# Build FFmpeg command
	cmd = [
	"ffmpeg", "-y", # -y to overwrite output file
	"-i", input_video_path,
	"-filter_complex", text_strip_filter,
	"-map", "[stacked]", # Map the processed video
	"-map", "0:a", # Map the original audio
	"-c:v", "libx264", # Video codec
	"-c:a", "copy", # Copy audio without re-encoding
	output_path
	]

	if verbose:
	print("Running FFmpeg command to add text strip:")
	print(" ".join(cmd))
	print("\nProcessing...")

	try:
	result = subprocess.run(cmd, capture_output=True, text=True, check=True)
	if verbose:
	print(f"✓ Successfully created video with text strip: {output_path}")
	return output_path
	except subprocess.CalledProcessError as e:
	if verbose:
	print(f"✗ FFmpeg error: {e.stderr}")
	return None
	except FileNotFoundError:
	if verbose:
	print("✗ FFmpeg not found. Please install FFmpeg first.")
	print(" - Windows: Download from https://ffmpeg.org/download.html")
	print(" - macOS: brew install ffmpeg")
	print(" - Linux: sudo apt install ffmpeg (Ubuntu/Debian)")
	return None

	def get_video_info(video_path):
	"""Get basic info about a video file."""
	cmd = [
	"ffprobe", "-v", "quiet", "-print_format", "json",
	"-show_format", "-show_streams", video_path
	]

	try:
	result = subprocess.run(cmd, capture_output=True, text=True, check=True)
	import json
	data = json.loads(result.stdout)

	# Find video stream
	for stream in data['streams']:
	if stream['codec_type'] == 'video':
	return {
	'width': stream['width'],
	'height': stream['height'],
	'duration': float(stream.get('duration', 0)),
	'fps': eval(stream.get('r_frame_rate', '0/1'))
	}
	except:
	pass
	return None






	# Example usage
	if __name__ == "__main__":
	# Example video paths - replace with your actual video files
	video_files = [
	"examples/folding_paper.mp4",
	"examples/S008C002P032R002A051.mp4",
	]

	output_file = "combined_videos.gif"

	# Check if example files exist
	existing_files = [f for f in video_files if os.path.exists(f)]

	if existing_files:
	print(f"Found {len(existing_files)} video files:")
	for video in existing_files:
	info = get_video_info(video)
	if info:
	print(f" {video}: {info['width']}x{info['height']}, {info['duration']:.1f}s")
	else:
	print(f" {video}: (info unavailable)")

	# Create the horizontal GIF
	success = create_side_by_side_gif(
	video_paths=existing_files,
	output_gif=output_file,
	gap_width=30, # 30px gap between videos
	fps=12, # 12 frames per second
	scale_height=300, # Scale all videos to 300px height
	gap_color="white" # White gap between videos
	)

	# Also create a vertical GIF
	vertical_output_file = "combined_videos_vertical.gif"
	success_vertical = create_top_to_bottom_gif(
	video_paths=existing_files,
	output_gif=vertical_output_file,
	gap_height=20, # 20px gap between videos
	fps=12, # 12 frames per second
	scale_width=320, # Scale all videos to 320px width
	gap_color="white" # White gap between videos
	)

	if success:
	file_size = os.path.getsize(output_file) / (1024 * 1024) # MB
	print(f"\nHorizontal GIF size: {file_size:.1f} MB")

	if success_vertical:
	vertical_file_size = os.path.getsize(vertical_output_file) / (1024 * 1024) # MB
	print(f"Vertical GIF size: {vertical_file_size:.1f} MB")

	# Example of reversing a video
	if existing_files:
	print(f"\nReversing first video: {existing_files[0]}")
	reversed_path = reverse_video(existing_files[0])
	if reversed_path:
	print(f"Reversed video saved to: {reversed_path}")

	# Example of adding text overlay
	if existing_files:
	print(f"\nAdding text overlay to first video: {existing_files[0]}")
	text_video_path = add_text_overlay(
	input_video_path=existing_files[0],
	text="Sample Title Text",
	font_size=30,
	font_color="white",
	background_color="black",
	position="top",
	margin=15
	)
	if text_video_path:
	print(f"Video with text overlay saved to: {text_video_path}")

	# Example of adding text strip
	if existing_files:
	print(f"\nAdding text strip to first video: {existing_files[0]}")
	strip_video_path = add_text_strip(
	input_video_path=existing_files[0],
	text="Video Title Strip",
	font_size=16,
	font_color="white",
	background_color="darkblue",
	position="top",
	text_padding=15
	)
	if strip_video_path:
	print(f"Video with text strip saved to: {strip_video_path}")
	else:
	print("No video files found. Please update the video_files list with your actual MP4 file paths.")
	print("\nExample usage:")
	print("video_files = [")
	print(' "/path/to/your/video1.mp4",')
	print(' "/path/to/your/video2.mp4",')
	print(' "/path/to/your/video3.mp4"')
	print("]")
	print("\n# Create horizontal GIF")
	print("create_side_by_side_gif(video_files, 'horizontal.gif')")
	print("\n# Create vertical GIF")
	print("create_top_to_bottom_gif(video_files, 'vertical.gif')")
	print("\n# Reverse a video")
	print("reversed_path = reverse_video('/path/to/your/video1.mp4')")
	print("print(f'Reversed video: {reversed_path}')")
	print("\n# Add text overlay to video")
	print("text_video = add_text_overlay('/path/to/your/video1.mp4', 'My Title', font_size=30)")
	print("print(f'Video with text: {text_video}')")
	print("\n# Add text strip to video (increases video height)")
	print("strip_video = add_text_strip('/path/to/your/video1.mp4', 'Title Strip', font_size=16)")
	print("print(f'Video with strip: {strip_video}')")