SKRIPTZ / app.py
skylinkd's picture
Create app.py
3d9b03b verified
Raw
History Blame Contribute Delete
270 kB
import os
import sys
import re
import random
import math
# THIS IS THE FIX - PART 1
os.environ['GRADIO_SUPPRESS_PROGRESS'] = 'true'
# THIS IS THE FIX - PART 2: Clean up console logs from Gradio
import logging
logging.getLogger('gradio').setLevel(logging.ERROR)
import cv2
import numpy as np
import gradio as gr
from gradio import Progress
import shutil
import subprocess
from PIL import Image, ImageDraw, ImageFont, ImageOps, ImageEnhance
from datetime import datetime
from threading import Lock
import base64
import io
# --- Dependency Check ---
try:
from controlnet_aux import (
CannyDetector, MLSDdetector, HEDdetector,
LineartDetector, OpenposeDetector, NormalBaeDetector
)
from gradio_client import Client
from rembg import remove
import librosa
# NEW: Added for the audio chopping feature
from pydub import AudioSegment
from pydub.silence import split_on_silence
except ImportError as e:
print("="*80)
print(f"ERROR: Missing dependency -> {e}")
print("Please install all required packages by running:")
print("pip install -r requirements.txt")
print("(Note: The new feature requires 'pydub'. Make sure it's in your requirements file.)")
print("="*80)
sys.exit(1)
# --- AI Model Dependency Check ---
try:
import whisper
except ImportError:
print("="*80)
print("WARNING: 'openai-whisper' not installed. The Transcription tab will be disabled.")
print("To enable it, run: pip install -U openai-whisper")
print("="*80)
whisper = None
# --- Slo-Mo & Enhance AI Dependency Check (SIMPLIFIED) ---
try:
from rife_ncnn_vulkan_python import Rife
ENHANCE_AI_AVAILABLE = True
except ImportError:
print("="*80)
print("WARNING: 'rife-ncnn-vulkan-python' not found.")
print("The AI-Enhanced option in 'Slo-Mo & Enhance' will be disabled.")
print("To enable it, run: pip install rife-ncnn-vulkan-python")
print("="*80)
Rife = None
ENHANCE_AI_AVAILABLE = False
# --- Global Variables & Setup ---
TEMP_DIR = "temp_gradio"
os.makedirs(TEMP_DIR, exist_ok=True)
model_load_lock = Lock()
loaded_detectors = {}
whisper_model = None
whisper_model_name = ""
rife_model = None
# REMOVED realesrgan_model
# --- Default Presets for Transfer Tab (Flat Dictionary) ---
DEFAULT_LINK_PRESETS = {
# Text To Image
"FLUX.1-schnell (black-forest-labs)": "https://huggingface.co/spaces/black-forest-labs/FLUX.1-schnell",
"FLUX.1-schnell (Rooc)": "https://huggingface.co/spaces/Rooc/FLUX.1-schnell",
"FLUX.1-schnell (evalstate)": "https://huggingface.co/spaces/evalstate/flux1_schnell",
"FLUX.1-schnell (hysts-mcp)": "https://huggingface.co/spaces/hysts-mcp/FLUX.1-schnell",
"FLUX.1-schnell (cbensimon)": "https://huggingface.co/spaces/cbensimon/FLUX-1-schnell-mcp",
"FLUX.1-dev": "https://huggingface.co/spaces/black-forest-labs/FLUX.1-dev",
"FLUX.1-dev-quantized": "https://huggingface.co/spaces/multimodalart/FLUX.1-dev-quantized",
"FLUX.1-dev_NotASI": "https://huggingface.co/spaces/NotASI/FLUX.1-dev",
"FLUX.1-dev_hysts": "https://huggingface.co/spaces/hysts-mcp/FLUX.1-dev",
"HiDream-I1-Dev": "https://huggingface.co/spaces/HiDream-ai/HiDream-I1-Dev",
"UnfilteredAI-NSFW-gen-v2": "https://huggingface.co/spaces/armen425221356/UnfilteredAI-NSFW-gen-v2_self_parms",
"InfiniteYou-FLUX": "https://huggingface.co/spaces/ByteDance/InfiniteYou-FLUX",
"Stable Diffusion 3.5 Large (arad1367)": "https://huggingface.co/spaces/arad1367/Stable_Diffusion_3_5_Large_Customized",
"Stable Diffusion 3.5 Large Turbo (doevent)": "https://huggingface.co/spaces/doevent/stable-diffusion-3.5-large-turbo",
# Virtual Try-On & Character
"OutfitAnyone": "https://huggingface.co/spaces/HumanAIGC/OutfitAnyone",
"Kolors Virtual Try-On": "https://huggingface.co/spaces/Kwai-Kolors/Kolors-Virtual-Try-On",
"Miragic Virtual Try-On": "https://huggingface.co/spaces/Miragic-AI/Miragic-Virtual-Try-On",
"OutfitAnyway": "https://huggingface.co/spaces/selfit-camera/OutfitAnyway",
"IDM-VTON": "https://huggingface.co/spaces/yisol/IDM-VTON",
"InstantCharacter": "https://huggingface.co/spaces/InstantX/InstantCharacter",
"InstantID": "https://huggingface.co/spaces/InstantX/InstantID",
# AI Lip-Sync & Talking Avatars
"LivePortrait": "https://huggingface.co/spaces/Han-123/LivePortrait",
"LivePortrait (CPU)": "https://huggingface.co/spaces/K00B404/LivePortrait_cpu",
"D-ID Live Portrait AI": "https://www.d-id.com/liveportrait-4/",
"Synthesia Avatars": "https://www.synthesia.io/features/avatars",
"Papercup": "https://www.papercup.com/",
"Hedra": "https://www.hedra.com",
"LemonSlice": "https://lemonslice.com",
"Vozo AI": "https://www.vozo.ai/lip-sync",
"Gooey AI Lipsync": "https://gooey.ai/Lipsync",
"Sync.so": "https://sync.so",
"LipDub AI": "https://www.lipdub.ai",
"Magic Hour": "https://magichour.ai",
"Lifelike AI": "https://www.lifelikeai.io",
"DeepMotion": "https://www.deepmotion.com",
"Elai.io": "https://elai.io",
"Rephrase.ai": "https://www.rephrase.ai",
"Colossyan": "https://www.colossyan.com",
"HeyGen (Movio)": "https://www.heygen.com",
"Murf Studio": "https://murf.ai",
# Image Editing & Upscaling
"FLUX Fill/Outpaint": "https://huggingface.co/spaces/multimodalart/flux-fill-outpaint",
"ReSize Image Outpainting": "https://huggingface.co/spaces/VIDraft/ReSize-Image-Outpainting",
"IC-Light (Relighting)": "https://huggingface.co/spaces/lllyasviel/IC-Light",
"IC-Light v2-vary": "https://huggingface.co/spaces/lllyasviel/iclight-v2-vary",
"Kontext Relight": "https://huggingface.co/spaces/kontext-community/kontext-relight",
"SUPIR Upscaler": "https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR",
# Video Generation & FramePacks
"Framepacks (atunc29)": "https://huggingface.co/spaces/atunc29/Framepacks",
"Framepack i2v (ginigen)": "https://huggingface.co/spaces/ginigen/framepack-i2v",
"Framepack i2v (beowcow)": "https://huggingface.co/spaces/beowcow/framepack-i2v",
"Framepack i2v (lisonallen)": "https://huggingface.co/spaces/lisonallen/framepack-i2v",
"FramePack F1 (Latyrine)": "https://huggingface.co/spaces/Latyrine/FramePack-F1",
"FramePack F1 (linoyts)": "https://huggingface.co/spaces/linoyts/FramePack-F1",
"FramePack Rotate (tori29umai)": "https://huggingface.co/spaces/tori29umai/FramePack_rotate_landscape",
"FramePack Rotate (bep40)": "https://huggingface.co/spaces/bep40/FramePack_rotate_landscape",
"FramePack Rotate (VIDraft)": "https://huggingface.co/spaces/VIDraft/FramePack_rotate_landscape",
"Framepack-H111 (rahul7star)": "https://huggingface.co/spaces/rahul7star/Framepack-H111",
"FLUX.1 Kontext Dev": "https://huggingface.co/spaces/black-forest-labs/FLUX.1-Kontext-Dev",
"Wan2-1-fast": "https://huggingface.co/spaces/multimodalart/wan2-1-fast",
"LTX-video-distilled": "https://huggingface.co/spaces/Lightricks/ltx-video-distilled",
"RunwayML": "https://app.runwayml.com/video-tools/teams/rinaabdine1/ai-tools/generate",
"Pika Labs": "https://pika.art/",
"Kling AI": "https://app.klingai.com/global/image-to-video/frame-mode",
# Video Interpolation & Slow Motion
"RIFE (remzloev)": "https://huggingface.co/spaces/remzloev/Rife",
"VFI Converter (Agung1453)": "https://huggingface.co/spaces/Agung1453/Video-Frame-Interpolation-Converter",
"ZeroGPU Upscaler/Interpolation": "https://huggingface.co/spaces/inoculatemedia/zerogpu-upscaler-interpolation",
"Frame Interpolation (meta-artem)": "https://huggingface.co/spaces/meta-artem/frame-interpolation",
"Video Frame Interpolation (guardiancc)": "https://huggingface.co/spaces/guardiancc/video_frame_interpolation",
"Video Frame Interpolation (freealise)": "https://huggingface.co/spaces/freealise/video_frame_interpolation",
"Framer (wwen1997)": "https://huggingface.co/spaces/wwen1997/Framer",
"Inter4k VideoInterpolator": "https://huggingface.co/spaces/vimleshc57/Inter4k_VideoInterpolator",
# AnimateDiff & Advanced Animation
"AnimateDiff Lightning (ByteDance)": "https://huggingface.co/spaces/ByteDance/AnimateDiff-Lightning",
"AnimateDiff Lightning (SahaniJi)": "https://huggingface.co/spaces/SahaniJi/AnimateDiff-Lightning",
"AnimateDiff (fatima14)": "https://huggingface.co/spaces/fatima14/AnimateDiff",
"AnimateDiff Video Gen (faizanR)": "https://huggingface.co/spaces/faizanR/animatediff-video-generator",
"Text-to-Animation Fast (MisterProton)": "https://huggingface.co/spaces/MisterProton/text-to-Animation-Fast-AnimateDiff",
"Text-to-Animation Fast (Rowdy013)": "https://huggingface.co/spaces/Rowdy013/text-to-Animation-Fast",
# StyleGAN & Portrait Motion
"StyleGAN-Human Interpolation (hysts)": "https://huggingface.co/spaces/hysts/StyleGAN-Human-Interpolation",
"StyleGAN-Human (Gradio-Blocks)": "https://huggingface.co/spaces/Gradio-Blocks/StyleGAN-Human",
# Film & Style Models
"MGM-Film-Diffusion (tonyassi)": "https://huggingface.co/spaces/tonyassi/MGM-Film-Diffusion",
"CineDiffusion (takarajordan)": "https://huggingface.co/spaces/takarajordan/CineDiffusion",
"FLUX Film Foto (MartsoBodziu1994)": "https://huggingface.co/spaces/MartsoBodziu1994/alvdansen-flux_film_foto",
"FLUX Style Shaping": "https://huggingface.co/spaces/multimodalart/flux-style-shaping",
"Film (Stijnijzelenberg)": "https://huggingface.co/spaces/Stijnijzelenberg/film",
"Film Eras (abbiewoodbridge)": "https://huggingface.co/spaces/abbiewoodbridge/Film_Eras",
"Film Genre Classifier (Rezuwan)": "https://huggingface.co/spaces/Rezuwan/film_genre_classifier",
"RunwayML (Faizbulbul)": "https://huggingface.co/spaces/Faizbulbul/Runwaymlfaiz",
# Text-to-3D
"Step1X-3D": "https://huggingface.co/spaces/stepfun-ai/Step1X-3D",
"TRELLIS TextTo3D (PUM4CH3N)": "https://huggingface.co/spaces/PUM4CH3N/TRELLIS_TextTo3D",
"TRELLIS TextTo3D (cavargas10)": "https://huggingface.co/spaces/cavargas10/TRELLIS-Texto3D",
"TRELLIS TextTo3D (dkatz2391)": "https://huggingface.co/spaces/dkatz2391/TRELLIS_TextTo3D_Try2",
"Sparc3D": "https://huggingface.co/spaces/ilcve21/Sparc3D",
"Hunyuan3D-2.1": "https://huggingface.co/spaces/tencent/Hunyuan3D-2.1",
# Image Captioning & Interrogation
"BLIP-2 (hysts)": "https://huggingface.co/spaces/hysts/BLIP2",
"BLIP-3o": "https://huggingface.co/spaces/BLIP3o/blip-3o",
"Blip-Dalle3 (DarwinAnim8or)": "https://huggingface.co/spaces/DarwinAnim8or/Blip-Dalle3",
"BLIP API (Jonu1)": "https://huggingface.co/spaces/Jonu1/blip-image-captioning-api",
"BLIP API (muxiddin19)": "https://huggingface.co/spaces/muxiddin19/blip-image-captioning-api",
# Diffusion & Sketching Tools
"DiffSketcher (SVGRender)": "https://huggingface.co/spaces/SVGRender/DiffSketcher",
"Diffusion WikiArt (kaupane)": "https://huggingface.co/spaces/kaupane/diffusion-wikiart",
"Diffusers Image Fill (OzzyGT)": "https://huggingface.co/spaces/OzzyGT/diffusers-image-fill",
"Diffusers Fast Inpaint (OzzyGT)": "https://huggingface.co/spaces/OzzyGT/diffusers-fast-inpaint",
# Audio & Voice Tools
"ThinkSound (FunAudioLLM)": "https://huggingface.co/spaces/FunAudioLLM/ThinkSound",
"TTS Unlimited (NihalGazi)": "https://huggingface.co/spaces/NihalGazi/Text-To-Speech-Unlimited",
"Voice Clon (tonyassi)": "https://huggingface.co/spaces/tonyassi/voice-clon",
# Scripting & Writing Tools
"SKRIPTZ (skylinkd)": "https://huggingface.co/spaces/skylinkd/SKRIPTZ",
# AI Frameworks & Platforms
"Hugging Face Hub": "https://huggingface.co",
"Hugging Face Transformers": "https://huggingface.co/docs/transformers/en/index",
"Hugging Face Inference API": "https://huggingface.co/inference-api/",
# Miscellaneous Video Tools
"SpatialTrackerV2 (Yuxihenry)": "https://huggingface.co/spaces/Yuxihenry/SpatialTrackerV2",
"MTVCraft (BAAI)": "https://huggingface.co/spaces/BAAI/MTVCraft",
# Miscellaneous Tools
"EBSynth (NihalGazi)": "https://huggingface.co/spaces/NihalGazi/EBSynth",
"MoodSpace (huzey)": "https://huggingface.co/spaces/huzey/MoodSpace",
"TR0N (Layer6)": "https://huggingface.co/spaces/Layer6/TR0N",
"TUTOR (nathannarrik)": "https://huggingface.co/spaces/nathannarrik/TUTOR",
"Sport Model 1 (CHEN11102)": "https://huggingface.co/spaces/CHEN11102/sportmodel1",
"VBench Leaderboard (Vchitect)": "https://huggingface.co/spaces/Vchitect/VBench_Leaderboard",
}
# --- Model Loading ---
DETECTOR_CONFIG = {
"Canny": {"class": CannyDetector, "args": {}},
"Lineart": {"class": LineartDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}},
"MLSD": {"class": MLSDdetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}},
"OpenPose": {"class": OpenposeDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}},
"NormalBAE": {"class": NormalBaeDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}},
"SoftEdge (HED)": {"class": HEDdetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}},
}
def get_detector(name):
with model_load_lock:
if name not in loaded_detectors:
print(f"Loading {name} model...")
config = DETECTOR_CONFIG[name]
if "pretrained_model_or_path" in config["args"]:
detector_class = config["class"]
loaded_detectors[name] = detector_class.from_pretrained(**config["args"])
else:
loaded_detectors[name] = config["class"](**config["args"])
print(f"{name} model loaded.")
return loaded_detectors[name]
def load_whisper_model(model_name="base"):
global whisper_model, whisper_model_name
if whisper:
with model_load_lock:
if whisper_model is None or whisper_model_name != model_name:
print(f"Loading Whisper model '{model_name}'... (This may download files on first run)")
whisper_model = whisper.load_model(model_name, device="cpu")
whisper_model_name = model_name
print("Whisper model loaded.")
return whisper_model
return None
def load_enhance_ai_models():
"""Load RIFE model if it is not already loaded."""
global rife_model
if not ENHANCE_AI_AVAILABLE:
return
with model_load_lock:
if rife_model is None:
print("Loading RIFE model for frame interpolation...")
rife_model = Rife(gpuid=0, model="rife-v4.6", num_threads=4, tta_mode=False)
print("RIFE model loaded.")
get_detector("Canny") # Pre-load Canny detector
# --- Utility Functions ---
def parse_color(color_str):
"""
Parses a color string from Gradio's ColorPicker.
It can handle hex strings ('#RRGGBB') or the problematic
rgba float format ('rgba(r,g,b,a)').
Returns a tuple (r, g, b) for PIL.
"""
if not isinstance(color_str, str):
return color_str # Should already be a tuple or other valid format
if color_str.startswith('rgba'):
parts = re.findall(r"[\d\.]+", color_str)
if len(parts) >= 3:
return (int(float(parts[0])), int(float(parts[1])), int(float(parts[2])))
# Handle standard hex '#RRGGBB'
if color_str.startswith('#'):
hex_color = color_str.lstrip('#')
return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
return color_str
def rotate_image(image, rotation):
if rotation == "90 Degrees Clockwise":
return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
elif rotation == "90 Degrees Counter-Clockwise":
return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
elif rotation == "180 Degrees":
return cv2.rotate(image, cv2.ROTATE_180)
return image
def manipulate_image(image, operation):
if image is None:
raise gr.Error("Please upload an image first.")
if operation == "Invert Colors":
return cv2.bitwise_not(image)
elif operation == "Flip Horizontal":
return cv2.flip(image, 1)
elif operation == "Flip Vertical":
return cv2.flip(image, 0)
elif operation == "Rotate 90Β° Right":
return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
elif operation == "Rotate 90Β° Left":
return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
else:
return image
def manipulate_video(video_path, operation):
if not video_path:
raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"manipulated_video_{timestamp}.mp4")
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise gr.Error("Error opening video file.")
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
if fps == 0: fps = 30
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_width, out_height = width, height
if operation in ["Rotate 90Β° Right", "Rotate 90Β° Left"]:
out_width, out_height = height, width
writer = cv2.VideoWriter(output_video_path, fourcc, fps, (out_width, out_height))
for _ in range(frame_count):
ret, frame = cap.read()
if not ret:
break
processed_frame = manipulate_image(frame, operation)
writer.write(processed_frame)
cap.release()
writer.release()
return output_video_path
def get_media_duration(media_path):
if not media_path or not os.path.exists(media_path):
return 0.0
# --- METHOD 1: Fast Metadata Probe (for well-formed files) ---
try:
cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", media_path]
result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=10)
return float(result.stdout.strip())
except Exception:
# This method failed, likely due to malformed metadata. Proceed to the robust method.
pass
# --- METHOD 2: Robust Full Scan (for problematic files) ---
print(f"Warning: Fast duration check failed for {os.path.basename(media_path)}. Performing robust scan (this may take a moment)...")
try:
cmd = ["ffmpeg", "-i", media_path, "-f", "null", "-"]
# We need to capture stderr, where ffmpeg writes its progress
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
# Search for the final 'time=' stamp in ffmpeg's output
matches = re.findall(r"time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})", result.stderr)
if matches:
last_match = matches[-1]
hours, minutes, seconds, hundredths = map(int, last_match)
total_seconds = (hours * 3600) + (minutes * 60) + seconds + (hundredths / 100.0)
print(f"Robust scan successful. Detected duration: {total_seconds:.2f}s")
return total_seconds
else:
# If even this fails, the file is likely very corrupt
print(f"Error: Robust duration scan also failed for {os.path.basename(media_path)}.")
return 0.0
except Exception as e:
print(f"An unexpected error occurred during robust scan for {media_path}: {e}")
return 0.0
def get_video_dimensions(video_path):
if not video_path: return 0, 0
try:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened(): return 0, 0
width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return width, height
except Exception: return 0, 0
def get_video_fps(video_path):
if not video_path: return 24.0
try:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened(): return 24.0
fps = cap.get(cv2.CAP_PROP_FPS)
cap.release()
return fps if fps > 0 else 24.0
except Exception: return 24.0
def has_audio_stream(video_path):
"""Checks if a video file has at least one audio stream."""
if not video_path:
return False
try:
cmd = [
"ffprobe", "-v", "error", "-select_streams", "a",
"-show_entries", "stream=codec_type", "-of",
"default=noprint_wrappers=1:nokey=1", video_path
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout.strip() != ""
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def run_ffmpeg_command(cmd, desc="Processing with FFMPEG..."):
try:
print(f"Running FFMPEG command: {' '.join(cmd)}")
process = subprocess.run(
cmd,
capture_output=True,
text=True,
encoding='utf-8',
check=False
)
if process.returncode != 0:
full_output = f"--- FFMPEG & GRADIO ERROR LOG ---\n\n" \
f"FFMPEG COMMAND:\n{' '.join(cmd)}\n\n" \
f"FFMPEG STDERR:\n{process.stderr}\n\n" \
f"FFMPEG STDOUT:\n{process.stdout}"
raise subprocess.CalledProcessError(process.returncode, cmd, output=full_output)
except subprocess.CalledProcessError as e:
raise gr.Error(f"FFMPEG failed!\n\nDetails:\n{e.output}")
except FileNotFoundError:
raise gr.Error("FFMPEG not found. Please ensure ffmpeg is installed and in your system's PATH.")
def batch_image_processor(files, processing_function, job_name, **kwargs):
if not files: raise gr.Error("Please upload at least one image.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"{job_name}_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True)
output_paths = []
for file_obj in files:
try:
base, _ = os.path.splitext(os.path.basename(file_obj.name))
if job_name == "zoom_videos":
output_filename = f"{base}.mp4"
elif job_name == "bg_removed":
output_filename = f"{base}.png"
elif job_name == "cropped":
output_filename = f"{base}_cropped.png"
else:
output_filename = os.path.basename(file_obj.name)
output_path = os.path.join(job_temp_dir, output_filename)
processing_function(input_path=file_obj.name, output_path=output_path, **kwargs)
output_paths.append(output_path)
except Exception as e:
print(f"Skipping file {file_obj.name} due to error: {e}")
continue
if not output_paths:
shutil.rmtree(job_temp_dir)
raise gr.Error("No images could be processed from the batch.")
zip_base_name = os.path.join(TEMP_DIR, f"{job_name}_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return output_paths, zip_path, job_temp_dir
def process_batch_images_with_detector(files, detector_name):
detector = get_detector(detector_name)
def apply_detector(input_path, output_path, **kwargs):
with Image.open(input_path).convert("RGB") as img:
processed = detector(img, detect_resolution=512, image_resolution=1024)
processed.save(output_path)
output_paths, zip_path, _ = batch_image_processor(files, apply_detector, f"controlnet_{detector_name}")
return output_paths, zip_path
def process_video_with_detector(video_path, detector_name):
if not video_path: raise gr.Error("Please upload a video first.")
detector = get_detector(detector_name)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"job_{timestamp}")
input_frames_dir, output_frames_dir = os.path.join(job_temp_dir, "input_frames"), os.path.join(job_temp_dir, "output_frames")
os.makedirs(input_frames_dir, exist_ok=True); os.makedirs(output_frames_dir, exist_ok=True)
output_video_path = os.path.join(TEMP_DIR, f"{detector_name.lower()}_output_{timestamp}.mp4")
cap = cv2.VideoCapture(video_path)
frame_count, frame_rate = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), get_video_fps(video_path)
for i in range(frame_count):
success, frame = cap.read()
if not success: break
cv2.imwrite(os.path.join(input_frames_dir, f"frame_{i:05d}.png"), frame)
cap.release()
input_files = sorted(os.listdir(input_frames_dir))
for filename in input_files:
with Image.open(os.path.join(input_frames_dir, filename)).convert("RGB") as image:
result_pil = detector(image, detect_resolution=512, image_resolution=1024)
result_np = cv2.cvtColor(np.array(result_pil), cv2.COLOR_RGB2BGR)
cv2.imwrite(os.path.join(output_frames_dir, filename), result_np)
cmd = ["ffmpeg", "-framerate", str(frame_rate), "-i", os.path.join(output_frames_dir, "frame_%05d.png"), "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]
run_ffmpeg_command(cmd, "Compiling Video")
shutil.rmtree(job_temp_dir)
return output_video_path
def extract_first_last_frame(video_path):
if not video_path:
raise gr.Error("Please upload a video first.")
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise gr.Error("Failed to open video file.")
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if frame_count < 1:
cap.release()
raise gr.Error("Video has no frames.")
# Set position to the first frame and read it
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
success, first_frame_img = cap.read()
if not success:
cap.release()
raise gr.Error("Could not read the first frame.")
# --- FIX for Last Frame (Robust Method) ---
# Direct seeking to frame_count - 1 can be unreliable.
# This method seeks near the end and then reads sequentially to find the true last frame.
last_frame_img = None
# Start checking from a few frames before the reported end to be safe.
start_frame_for_last = max(1, frame_count - 10)
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame_for_last)
# Loop through the last few frames to ensure we get the very last one
while True:
success, frame = cap.read()
if not success:
break
last_frame_img = frame
cap.release()
# If the loop fails (e.g., for very short videos), fall back to using the first frame as the last.
if last_frame_img is None:
last_frame_img = first_frame_img
# --- FIX for saving with proper extension ---
# The function now saves the images to temporary files with correct names (.png) and returns the paths.
# Gradio's Gallery will display these files, and downloading them will use the correct filename.
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
first_frame_path = os.path.join(TEMP_DIR, f"first_frame_{timestamp}.png")
last_frame_path = os.path.join(TEMP_DIR, f"last_frame_{timestamp}.png")
# Convert from OpenCV's BGR format to RGB before saving with the PIL library
Image.fromarray(cv2.cvtColor(first_frame_img, cv2.COLOR_BGR2RGB)).save(first_frame_path)
Image.fromarray(cv2.cvtColor(last_frame_img, cv2.COLOR_BGR2RGB)).save(last_frame_path)
# Return the list of file paths to be displayed in the gallery
return [first_frame_path, last_frame_path]
# ### --- NEW FEATURE FUNCTION --- ###
def batch_extract_first_last_frames(videos, progress=gr.Progress(track_tqdm=True)):
if not videos:
raise gr.Error("Please upload at least one video.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"batch_fl_frames_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
output_paths = []
for video_file in progress.tqdm(videos, desc="Processing videos"):
try:
video_path = video_file.name
base_name = os.path.splitext(os.path.basename(video_path))[0]
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
gr.Warning(f"Skipping '{base_name}': could not open video file.")
continue
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if frame_count < 1:
cap.release()
gr.Warning(f"Skipping '{base_name}': video has no frames.")
continue
# First frame
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
success, first_frame_img = cap.read()
if not success:
cap.release()
gr.Warning(f"Skipping '{base_name}': could not read the first frame.")
continue
# Last frame (robust method)
last_frame_img = None
start_frame_for_last = max(1, frame_count - 10)
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame_for_last)
while True:
success, frame = cap.read()
if not success:
break
last_frame_img = frame
cap.release()
if last_frame_img is None:
last_frame_img = first_frame_img
# Save frames
first_frame_path = os.path.join(job_temp_dir, f"{base_name}_first.png")
last_frame_path = os.path.join(job_temp_dir, f"{base_name}_last.png")
Image.fromarray(cv2.cvtColor(first_frame_img, cv2.COLOR_BGR2RGB)).save(first_frame_path)
Image.fromarray(cv2.cvtColor(last_frame_img, cv2.COLOR_BGR2RGB)).save(last_frame_path)
output_paths.extend([first_frame_path, last_frame_path])
except Exception as e:
gr.Warning(f"Skipping file {os.path.basename(video_file.name)} due to an error: {e}")
if 'cap' in locals() and cap.isOpened():
cap.release()
continue
if not output_paths:
shutil.rmtree(job_temp_dir)
raise gr.Error("No frames could be extracted from the batch.")
zip_base_name = os.path.join(TEMP_DIR, f"batch_fl_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return output_paths, zip_path
def video_to_frames_extractor(video_path, skip_rate, rotation, do_resize, out_w, out_h, out_format, jpg_quality):
if not video_path: raise gr.Error("Please upload a video first.")
if do_resize and (out_w <= 0 or out_h <= 0): raise gr.Error("If resizing, width and height must be positive.")
cap = cv2.VideoCapture(video_path)
if not cap.isOpened(): raise gr.Error("Failed to open video file.")
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
if frame_count < 1: cap.release(); raise gr.Error("Video appears to have no frames.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"v2f_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True)
frame_paths = []
saved_count = 0
for i in range(frame_count):
success, frame = cap.read()
if not success: break
if i % skip_rate != 0: continue
frame = rotate_image(frame, rotation)
if do_resize: frame = cv2.resize(frame, (out_w, out_h), interpolation=cv2.INTER_LANCZOS4)
frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
file_ext = out_format.lower()
frame_path = os.path.join(job_temp_dir, f"frame_{saved_count:05d}.{file_ext}")
if out_format == "JPG": frame_pil.save(frame_path, quality=jpg_quality)
else: frame_pil.save(frame_path)
frame_paths.append(frame_path)
saved_count += 1
cap.release()
if not frame_paths: shutil.rmtree(job_temp_dir); raise gr.Error("Could not extract any frames.")
zip_base_name = os.path.join(TEMP_DIR, f"frames_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return frame_paths[:100], zip_path
def create_video_from_frames(files, fps, rotation, do_resize, out_w, out_h):
if not files: raise gr.Error("Please upload frame images first.")
if do_resize and (out_w <= 0 or out_h <= 0): raise gr.Error("If resizing, width and height must be positive.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"f2v_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True)
filenames = []
for i, file in enumerate(files):
ext = os.path.splitext(file.name)[1]
temp_path = os.path.join(job_temp_dir, f"frame_{i:05d}{ext}")
shutil.copy(file.name, temp_path); filenames.append(temp_path)
output_video_path = os.path.join(TEMP_DIR, f"video_from_frames_{timestamp}.mp4")
first_frame_img = rotate_image(cv2.imread(filenames[0]), rotation)
h, w, _ = first_frame_img.shape
if do_resize: w, h = out_w, out_h
w -= w % 2; h -= h % 2
temp_processed_dir = os.path.join(job_temp_dir, "processed"); os.makedirs(temp_processed_dir, exist_ok=True)
for i, filename in enumerate(filenames):
frame = rotate_image(cv2.imread(filename), rotation)
frame = cv2.resize(frame, (w, h), interpolation=cv2.INTER_LANCZOS4)
cv2.imwrite(os.path.join(temp_processed_dir, f"pframe_{i:05d}.png"), frame)
cmd = ["ffmpeg", "-framerate", str(fps), "-i", os.path.join(temp_processed_dir, "pframe_%05d.png"), "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]
run_ffmpeg_command(cmd, "Compiling Video")
shutil.rmtree(job_temp_dir)
return output_video_path
def image_to_looping_video(image_array, duration, audio_path=None):
if image_array is None: raise gr.Error("Please upload an image first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
temp_image_path = os.path.join(TEMP_DIR, f"temp_image_{timestamp}.png")
output_video_path = os.path.join(TEMP_DIR, f"looping_video_{timestamp}.mp4")
img = Image.fromarray(image_array)
img.save(temp_image_path)
width, height = img.size
width -= width % 2; height -= height % 2
cmd = ["ffmpeg", "-loop", "1", "-i", temp_image_path]
if audio_path:
cmd.extend(["-i", audio_path, "-c:a", "aac", "-shortest"])
cmd.extend(["-c:v", "libx264", "-t", str(duration), "-pix_fmt", "yuv420p", "-vf", f"scale={width}:{height}", "-y", output_video_path])
run_ffmpeg_command(cmd, "Creating Looping Video...")
os.remove(temp_image_path)
return output_video_path
def create_zoom_videos(files, duration, zoom_ratio, zoom_direction, combine_videos, audio_path=None):
if not files:
raise gr.Error("Please upload at least one image.")
fps = 30
total_frames = int(duration * fps)
zoom_step = (zoom_ratio - 1.0) / total_frames
zoom_coords = {
"Center": "x=iw/2-(iw/zoom)/2:y=ih/2-(ih/zoom)/2", "Top": "x=iw/2-(iw/zoom)/2:y=0", "Bottom": "x=iw/2-(iw/zoom)/2:y=ih-(ih/zoom)",
"Left": "x=0:y=ih/2-(ih/zoom)/2", "Right": "x=iw-(iw/zoom):y=ih/2-(ih/zoom)/2", "Top-Left": "x=0:y=0",
"Top-Right": "x=iw-(iw/zoom):y=0", "Bottom-Left": "x=0:y=ih-(ih/zoom)", "Bottom-Right": "x=iw-(iw/zoom):y=ih-(ih/zoom)",
}
def process_single_image(input_path, output_path, **kwargs):
audio_for_clip = kwargs.get('audio_for_clip')
zoom_filter = (f"scale=3840:-1,zoompan=z='min(zoom+{zoom_step},{zoom_ratio})':{zoom_coords[zoom_direction]}:d={total_frames}:s=1920x1080:fps={fps}")
cmd = ["ffmpeg", "-loop", "1", "-i", input_path]
if audio_for_clip:
cmd.extend(["-i", audio_for_clip, "-c:a", "aac", "-shortest"])
cmd.extend(["-vf", zoom_filter, "-c:v", "libx264", "-t", str(duration), "-pix_fmt", "yuv420p", "-b:v", "5M", "-y", output_path])
run_ffmpeg_command(cmd, f"Creating zoom video for {os.path.basename(input_path)}")
batch_kwargs = {}
if not combine_videos and audio_path:
batch_kwargs['audio_for_clip'] = audio_path
video_paths, zip_path, job_temp_dir = batch_image_processor(files, process_single_image, "zoom_videos", **batch_kwargs)
if not combine_videos:
return video_paths, None, zip_path
if not video_paths:
raise gr.Error("No videos were created to be combined.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
silent_combined_path = os.path.join(job_temp_dir, f"combined_silent_{timestamp}.mp4")
if len(video_paths) > 1:
file_list_path = os.path.join(job_temp_dir, "files.txt")
with open(file_list_path, 'w', encoding='utf-8') as f:
for path in video_paths:
f.write(f"file '{os.path.abspath(path)}'\n")
run_ffmpeg_command(["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", silent_combined_path], "Combining Videos")
else:
shutil.copy(video_paths[0], silent_combined_path)
if audio_path:
final_video_path = os.path.join(TEMP_DIR, f"combined_audio_{timestamp}.mp4")
run_ffmpeg_command(["ffmpeg", "-i", silent_combined_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", final_video_path], "Adding audio...")
else:
final_video_path = os.path.join(TEMP_DIR, f"combined_final_{timestamp}.mp4")
shutil.move(silent_combined_path, final_video_path)
return None, final_video_path, zip_path
def change_video_speed(video_path, speed_multiplier):
if not video_path: raise gr.Error("Please upload a video first.")
if speed_multiplier <= 0: raise gr.Error("Speed multiplier must be positive.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"speed_change_{timestamp}.mp4")
pts_value = 1 / speed_multiplier
cmd = ["ffmpeg", "-i", video_path, "-filter:v", f"setpts={pts_value}*PTS", "-an", "-y", output_video_path]
run_ffmpeg_command(cmd, "Changing Video Speed")
return output_video_path
def _get_atempo_filter_string(speed):
"""Helper function to create a chained atempo filter string for FFMPEG."""
filters = []
# 'atempo' is limited to [0.5, 100.0]
if speed > 100.0:
while speed > 100.0:
filters.append("atempo=100.0")
speed /= 100.0
elif speed < 0.5:
while speed < 0.5:
filters.append("atempo=0.5")
speed /= 0.5
# Add the final filter for the remaining speed adjustment
if speed != 1.0: # Avoid adding atempo=1.0 which does nothing
filters.append(f"atempo={speed}")
return ",".join(filters) if filters else None
def process_slowmo_enhance_video(video_path, output_path, slowdown_factor, method, progress):
"""
Processes a single video for slow-motion and enhancement.
"""
if not video_path:
raise gr.Error("Missing video path for processing.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"slowmo_{os.path.basename(video_path)}_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
original_fps = get_video_fps(video_path)
final_fps = original_fps * slowdown_factor
has_audio = has_audio_stream(video_path)
try:
if method == "AI-Enhanced (High Quality)":
input_frames_dir = os.path.join(job_temp_dir, "input_frames")
processed_frames_dir = os.path.join(job_temp_dir, "processed_frames")
os.makedirs(input_frames_dir, exist_ok=True)
os.makedirs(processed_frames_dir, exist_ok=True)
load_enhance_ai_models()
progress(0.1, desc="Extracting frames...")
run_ffmpeg_command(["ffmpeg", "-i", video_path, os.path.join(input_frames_dir, "frame_%06d.png")])
input_frames = sorted([os.path.join(input_frames_dir, f) for f in os.listdir(input_frames_dir)])
if not input_frames:
raise gr.Error("Could not extract any frames from the video.")
progress(0.3, desc="AI Interpolating frames (This can be slow)...")
for i in progress.tqdm(range(len(input_frames) - 1), unit="frame pairs"):
frame0 = cv2.imread(input_frames[i])
frame1 = cv2.imread(input_frames[i+1])
shutil.copy(input_frames[i], os.path.join(processed_frames_dir, f"proc_{i:06d}_0.png"))
interpolated_frames = rife_model.process(frame0, frame1, count=slowdown_factor-1)
for j, int_frame in enumerate(interpolated_frames):
cv2.imwrite(os.path.join(processed_frames_dir, f"proc_{i:06d}_{j+1}.png"), int_frame)
shutil.copy(input_frames[-1], os.path.join(processed_frames_dir, f"proc_{len(input_frames)-1:06d}_0.png"))
progress(0.8, desc="Compiling final video...")
silent_video_path = os.path.join(job_temp_dir, "silent_video.mp4")
cmd = ["ffmpeg", "-framerate", str(original_fps), "-pattern_type", "glob", "-i", os.path.join(processed_frames_dir, "*.png"), "-c:v", "libx264", "-crf", "18", "-pix_fmt", "yuv420p", "-y", silent_video_path]
run_ffmpeg_command(cmd)
if has_audio:
progress(0.9, desc="Attaching slowed audio...")
atempo_filter_str = _get_atempo_filter_string(1.0 / slowdown_factor)
cmd_audio = ["ffmpeg", "-i", silent_video_path, "-i", video_path, "-filter:a", atempo_filter_str, "-c:v", "copy", "-map", "0:v:0", "-map", "1:a:0", "-y", output_path]
run_ffmpeg_command(cmd_audio)
else:
shutil.move(silent_video_path, output_path)
elif method == "Standard (Fast)":
progress(0.5, desc="Processing with FFMPEG filter...")
cmd = ["ffmpeg", "-y", "-i", video_path]
vf_filter_chain = f"minterpolate=fps={final_fps}:mi_mode=mci,setpts={float(slowdown_factor)}*PTS"
cmd.extend(["-vf", vf_filter_chain])
if has_audio:
atempo_filter_str = _get_atempo_filter_string(1.0 / slowdown_factor)
if atempo_filter_str:
cmd.extend(["-af", atempo_filter_str])
else:
cmd.append("-an")
cmd.extend(["-r", str(original_fps)])
cmd.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-crf", "18", output_path])
run_ffmpeg_command(cmd)
finally:
if os.path.exists(job_temp_dir):
shutil.rmtree(job_temp_dir)
def batch_slowmo_enhance_videos(videos, slowdown_factor_str, method, progress=gr.Progress(track_tqdm=True)):
if not videos:
raise gr.Error("Please upload at least one video.")
slowdown_factor = int(slowdown_factor_str.replace('x', ''))
if "AI-Enhanced" in method and not ENHANCE_AI_AVAILABLE:
raise gr.Error("AI-Enhanced method is not available. Please install 'rife-ncnn-vulkan-python' and restart the app.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"slowmo_batch_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
output_paths = []
for i, video_file in enumerate(videos):
progress(i / len(videos), desc=f"Processing video {i+1}/{len(videos)}: {os.path.basename(video_file.name)}")
base, _ = os.path.splitext(os.path.basename(video_file.name))
output_path = os.path.join(job_temp_dir, f"{base}_slowmo_{slowdown_factor}x.mp4")
process_slowmo_enhance_video(video_file.name, output_path, slowdown_factor, method, progress)
output_paths.append(output_path)
if not output_paths:
shutil.rmtree(job_temp_dir)
raise gr.Error("No videos could be processed from the batch.")
zip_base_name = os.path.join(TEMP_DIR, f"slowmo_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return output_paths, zip_path
def change_audio_speed(audio_path, speed_multiplier):
if not audio_path:
raise gr.Error("Please upload an audio file.")
if speed_multiplier <= 0:
raise gr.Error("Speed multiplier must be a positive number.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Get original extension
_, extension = os.path.splitext(os.path.basename(audio_path))
if not extension: extension = ".mp3" # Fallback
output_audio_path = os.path.join(TEMP_DIR, f"audio_speed_{speed_multiplier}x_{timestamp}{extension}")
atempo_filter_str = _get_atempo_filter_string(speed_multiplier)
if not atempo_filter_str:
# If no speed change, just copy the file to avoid processing
gr.Info("No speed change applied (multiplier is 1.0).")
shutil.copy(audio_path, output_audio_path)
return output_audio_path
cmd = ["ffmpeg", "-i", audio_path, "-filter:a", atempo_filter_str, "-y", output_audio_path]
run_ffmpeg_command(cmd, "Changing audio speed...")
return output_audio_path
# ### --- NEW FEATURE FUNCTION --- ###
def chop_audio_on_silence(audio_path, silence_thresh, min_silence_len, progress=gr.Progress(track_tqdm=True)):
if not audio_path:
raise gr.Error("Please upload an audio file to chop.")
progress(0, desc="Loading audio file...")
try:
sound = AudioSegment.from_file(audio_path)
except Exception as e:
raise gr.Error(f"Could not read audio file. It may be corrupt or in an unsupported format. Details: {e}")
progress(0.2, desc="Detecting non-silent chunks...")
audio_chunks = split_on_silence(
sound,
min_silence_len=int(min_silence_len),
silence_thresh=int(silence_thresh),
keep_silence=200 # Keep a bit of silence at the start/end of each chunk
)
if not audio_chunks:
raise gr.Error("No audio chunks were found above the silence threshold. Try using a lower (more negative) threshold value.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"audio_chop_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
output_paths = []
for i, chunk in enumerate(progress.tqdm(audio_chunks, desc="Exporting chunks...")):
output_path = os.path.join(job_temp_dir, f"chunk_{i:04d}.mp3")
chunk.export(output_path, format="mp3")
output_paths.append(output_path)
if not output_paths:
shutil.rmtree(job_temp_dir)
raise gr.Error("Failed to export any audio chunks.")
zip_base_name = os.path.join(TEMP_DIR, f"audio_chop_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
# Return a preview gallery and the zip file
return output_paths, zip_path
def reverse_video(video_path, audio_option):
if not video_path: raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"reversed_video_{timestamp}.mp4")
filters = ["reverse"]
if audio_option == "Reverse Audio": filters.append("areverse")
cmd = ["ffmpeg", "-i", video_path, "-vf", filters[0]]
if len(filters) > 1: cmd.extend(["-af", filters[1]])
if audio_option == "Remove Audio": cmd.append("-an")
cmd.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path])
run_ffmpeg_command(cmd, "Reversing video...")
return output_video_path
def add_audio_to_video(video_path, audio_path):
if not video_path: raise gr.Error("Please upload a video.")
if not audio_path: raise gr.Error("Please upload an audio file.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"video_with_audio_{timestamp}.mp4")
cmd = ["ffmpeg", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", output_video_path]
run_ffmpeg_command(cmd, "Adding Audio to Video")
return output_video_path
def extract_audio(video_path, audio_format="mp3"):
if not video_path: raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_audio_path = os.path.join(TEMP_DIR, f"extracted_audio_{timestamp}.{audio_format}")
cmd = ["ffmpeg", "-i", video_path, "-vn"] # -vn strips video
if audio_format == "mp3": cmd.extend(["-c:a", "libmp3lame", "-q:a", "2"]) # VBR quality
elif audio_format == "aac": cmd.extend(["-c:a", "aac", "-b:a", "192k"])
elif audio_format == "wav": cmd.extend(["-c:a", "pcm_s16le"])
cmd.extend(["-y", output_audio_path])
run_ffmpeg_command(cmd, "Extracting audio...")
return output_audio_path
def create_gif_from_video(video_path, start_time, end_time):
if not video_path: raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_gif_path, palette_path = os.path.join(TEMP_DIR, f"video_to_gif_{timestamp}.gif"), os.path.join(TEMP_DIR, f"palette_{timestamp}.png")
duration_filter = []
if start_time > 0 or end_time > 0:
if end_time > 0 and end_time <= start_time: raise gr.Error("End time must be after start time.")
if start_time > 0: duration_filter.extend(["-ss", str(start_time)])
if end_time > 0: duration_filter.extend(["-to", str(end_time)])
run_ffmpeg_command(["ffmpeg", "-i", video_path] + duration_filter + ["-vf", "fps=15,scale=480:-1:flags=lanczos,palettegen", "-y", palette_path])
run_ffmpeg_command(["ffmpeg", "-i", video_path] + duration_filter + ["-i", palette_path, "-filter_complex", "fps=15,scale=480:-1:flags=lanczos[x];[x][1:v]paletteuse", "-y", output_gif_path])
os.remove(palette_path)
return output_gif_path
def get_frame_at_time(video_path, time_in_seconds=0):
if not video_path: return None
try:
command = ['ffmpeg', '-ss', str(time_in_seconds), '-i', video_path, '-vframes', '1', '-f', 'image2pipe', '-c:v', 'png', '-']
pipe = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
return Image.open(io.BytesIO(pipe.stdout)).convert("RGB")
except Exception as e:
print(f"Error extracting frame for crop preview: {e}")
cap = cv2.VideoCapture(video_path); cap.set(cv2.CAP_PROP_POS_MSEC, time_in_seconds * 1000)
success, frame = cap.read(); cap.release()
if success: return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
return None
def crop_video(video_path, x, y, w, h, do_resize, out_w, out_h):
if not video_path: raise gr.Error("Please upload a video first.")
w, h, x, y = int(w), int(h), int(x), int(y)
w -= w % 2; h -= h % 2
if w <= 0 or h <= 0: raise gr.Error("Crop dimensions must be positive.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"cropped_video_{timestamp}.mp4")
vf_filters = [f"crop={w}:{h}:{x}:{y}"]
if do_resize:
if out_w <= 0 or out_h <= 0: raise gr.Error("Resize dimensions must be positive.")
out_w, out_h = int(out_w), int(out_h)
out_w -= out_w % 2; out_h -= out_h % 2
vf_filters.append(f"scale={out_w}:{out_h}")
cmd = ["ffmpeg", "-i", video_path, "-vf", ",".join(vf_filters), "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]
run_ffmpeg_command(cmd, "Cropping video...")
return output_video_path
def trim_video(video_path, start_time, end_time):
if not video_path: raise gr.Error("Please upload a video first.")
if start_time < 0: start_time = 0
if end_time <= start_time: end_time = 0
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"trimmed_video_{timestamp}.mp4")
cmd = ["ffmpeg", "-i", video_path, "-ss", str(start_time)]
if end_time > 0: cmd.extend(["-to", str(end_time)])
cmd.extend(["-c:v", "libx264", "-c:a", "copy", "-pix_fmt", "yuv420p", "-y", output_video_path])
run_ffmpeg_command(cmd, "Trimming Video")
return output_video_path
def apply_video_watermark(video_path, text, position, opacity, size_scale, color):
if not video_path: raise gr.Error("Please upload a video first.")
if not text: raise gr.Error("Watermark text cannot be empty.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"watermarked_video_{timestamp}.mp4")
_ , video_h = get_video_dimensions(video_path)
if video_h == 0:
video_h = 720 # Fallback
escaped_text = text.replace("'", r"'\''").replace(":", r"\:").replace(",", r"\,")
pos_map = {"Top-Left": "x=20:y=20", "Top-Right": "x=w-tw-20:y=20", "Bottom-Left": "x=20:y=h-th-20", "Bottom-Right": "x=w-tw-20:y=h-th-20", "Center": "x=(w-tw)/2:y=(h-th)/2"}
font_opacity = opacity / 100.0
font_size = int(video_h / (50 - (size_scale * 3.5)))
drawtext_filter = (
f"drawtext="
f"text='{escaped_text}':"
f"{pos_map[position]}:"
f"fontsize={font_size}:"
f"fontcolor={color}@{font_opacity}"
)
cmd = [
"ffmpeg", "-i", video_path,
"-vf", drawtext_filter,
"-c:a", "copy",
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
"-y", output_video_path
]
run_ffmpeg_command(cmd, "Applying text watermark...")
return output_video_path
def remove_video_background(video_path):
if not video_path: raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"bg_rem_job_{timestamp}"); input_frames_dir, output_frames_dir = os.path.join(job_temp_dir, "input_frames"), os.path.join(job_temp_dir, "output_frames")
os.makedirs(input_frames_dir, exist_ok=True); os.makedirs(output_frames_dir, exist_ok=True)
cap = cv2.VideoCapture(video_path); frame_count, fps = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), get_video_fps(video_path)
for i in range(frame_count):
success, frame = cap.read()
if not success: break
cv2.imwrite(os.path.join(input_frames_dir, f"frame_{i:05d}.png"), frame)
cap.release()
for filename in sorted(os.listdir(input_frames_dir)):
with Image.open(os.path.join(input_frames_dir, filename)) as img:
remove(img).save(os.path.join(output_frames_dir, filename))
output_video_path = os.path.join(TEMP_DIR, f"bg_removed_{timestamp}.webm")
cmd = ["ffmpeg", "-framerate", str(fps), "-i", os.path.join(output_frames_dir, "frame_%05d.png"), "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p", "-auto-alt-ref", "0", "-b:v", "1M", "-y", output_video_path]
run_ffmpeg_command(cmd, "Compiling transparent video...")
shutil.rmtree(job_temp_dir)
return output_video_path
def generate_ass_from_whisper(result):
"""Generates an ASS subtitle file content from a Whisper result object with word timestamps."""
ass_content = [
"[Script Info]",
"Title: Generated by Skriptz",
"ScriptType: v4.00+",
"WrapStyle: 0",
"PlayResX: 1920",
"PlayResY: 1080",
"\n[V4+ Styles]",
"Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding",
"Style: Default,Arial,55,&H00FFFFFF,&H0000FFFF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,1,2,10,10,25,1",
"\n[Events]",
"Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"
]
def format_time(s):
h, r = divmod(s, 3600)
m, s = divmod(r, 60)
cs = int((s - int(s)) * 100)
return f"{int(h)}:{int(m):02}:{int(s):02}.{cs:02}"
for segment in result['segments']:
start_time = format_time(segment['start'])
end_time = format_time(segment['end'])
karaoke_line = ""
for word_info in segment['words']:
word = word_info['word'].strip()
duration_cs = int((word_info['end'] - word_info['start']) * 100)
karaoke_line += f"{{\\k{duration_cs}}}{word} "
dialogue_line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{karaoke_line.strip()}"
ass_content.append(dialogue_line)
return "\n".join(ass_content)
def transcribe_media(media_path, model_name):
if media_path is None: raise gr.Error("Please upload a video or audio file first.")
model = load_whisper_model(model_name)
if model is None: raise gr.Error("Whisper model is not available.")
audio_path = media_path.name
base_name = os.path.splitext(os.path.basename(media_path.name))[0]
if audio_path.lower().endswith(('.mp4', '.mov', '.mkv', '.avi', '.webm')):
audio_path_temp = os.path.join(TEMP_DIR, f"{base_name}.mp3")
try:
run_ffmpeg_command(["ffmpeg", "-y", "-i", audio_path, "-q:a", "0", "-map", "a", audio_path_temp])
audio_path = audio_path_temp
except gr.Error as e:
if "does not contain any stream" in str(e): raise gr.Error("The uploaded video has no audio track.")
else: raise e
result = model.transcribe(audio_path, word_timestamps=True, verbose=False)
def format_ts(s, separator=','):
h, r = divmod(s, 3600); m, s = divmod(r, 60)
return f"{int(h):02}:{int(m):02}:{int(s):02}{separator}{int((s-int(s))*1000):03}"
srt_path = os.path.join(TEMP_DIR, f"{base_name}.srt")
vtt_path = os.path.join(TEMP_DIR, f"{base_name}.vtt")
ass_path = os.path.join(TEMP_DIR, f"{base_name}.ass")
with open(srt_path, "w", encoding="utf-8") as srt_f, open(vtt_path, "w", encoding="utf-8") as vtt_f:
vtt_f.write("WEBVTT\n\n")
for i, seg in enumerate(result["segments"]):
start, end, text = seg['start'], seg['end'], seg['text'].strip()
srt_f.write(f"{i + 1}\n{format_ts(start)} --> {format_ts(end)}\n{text}\n\n")
vtt_f.write(f"{format_ts(start, '.')} --> {format_ts(end, '.')}\n{text}\n\n")
ass_content = generate_ass_from_whisper(result)
with open(ass_path, "w", encoding="utf-8") as ass_f:
ass_f.write(ass_content)
return result["text"], [srt_path, vtt_path, ass_path]
def transcribe_and_prep_burn(media_file, model_name):
if not media_file: raise gr.Error("Please upload a file first.")
is_video = media_file.name.lower().endswith(('.mp4', '.mov', '.mkv', '.avi', '.webm'))
text, files = transcribe_media(media_file, model_name)
if is_video: return text, files, media_file.name, gr.update(visible=True)
else: return text, files, None, gr.update(visible=False)
def reformat_srt_for_word_wrap(original_srt_path, words_per_line):
if not original_srt_path or not os.path.exists(original_srt_path):
return None
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
reformatted_path = os.path.join(TEMP_DIR, f"reformatted_{timestamp}.srt")
with open(original_srt_path, 'r', encoding='utf-8') as f_in, \
open(reformatted_path, 'w', encoding='utf-8') as f_out:
content = f_in.read().strip().split('\n\n')
for block in content:
lines = block.split('\n')
if len(lines) < 3:
f_out.write(block + '\n\n')
continue
text_lines = ' '.join(lines[2:])
words = text_lines.split()
new_text_lines = []
current_line = []
for word in words:
current_line.append(word)
if len(current_line) >= words_per_line:
new_text_lines.append(' '.join(current_line))
current_line = []
if current_line: new_text_lines.append(' '.join(current_line))
reformatted_text = '\n'.join(new_text_lines)
f_out.write(f"{lines[0]}\n{lines[1]}\n{reformatted_text}\n\n")
return reformatted_path
def burn_block_subtitles(video_path, srt_file_obj, font_size_scale, font_color, words_per_line):
original_srt_path = srt_file_obj[0].name
reformatted_srt_path = None
try:
reformatted_srt_path = reformat_srt_for_word_wrap(original_srt_path, words_per_line)
if not reformatted_srt_path: raise gr.Error("Failed to reformat subtitle file.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"subtitled_video_{timestamp}.mp4")
_, video_h = get_video_dimensions(video_path)
if video_h == 0: video_h = 720
divisor = 32 - (font_size_scale * 2)
calculated_font_size = int(video_h / divisor)
color_bgr = font_color[5:7] + font_color[3:5] + font_color[1:3]
ffmpeg_color = f"&H00{color_bgr.upper()}"
escaped_srt_path = reformatted_srt_path.replace('\\', '/').replace(':', r'\\:')
vf_filter = f"subtitles='{escaped_srt_path}':force_style='Fontsize={calculated_font_size},PrimaryColour={ffmpeg_color},BorderStyle=1,Outline=1,Shadow=0.5,MarginV=25'"
cmd = ["ffmpeg", "-y", "-i", video_path, "-vf", vf_filter, "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", output_video_path]
run_ffmpeg_command(cmd, "Burning block subtitles into video...")
return output_video_path
finally:
if reformatted_srt_path and os.path.exists(reformatted_srt_path):
os.remove(reformatted_srt_path)
def burn_karaoke_subtitles(video_path, subtitle_files, font_size_scale, base_color, highlight_color):
ass_file_path = subtitle_files[2].name
temp_ass_path = None
try:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
temp_ass_path = os.path.join(TEMP_DIR, f"style_applied_{timestamp}.ass")
output_video_path = os.path.join(TEMP_DIR, f"karaoke_video_{timestamp}.mp4")
_, video_h = get_video_dimensions(video_path)
if video_h == 0: video_h = 720
calculated_font_size = int((video_h / 20) * (font_size_scale / 5))
def format_ass_color(hex_color):
if hex_color.startswith('#'): hex_color = hex_color[1:]
r, g, b = hex_color[0:2], hex_color[2:4], hex_color[4:6]
return f"&H00{b.upper()}{g.upper()}{r.upper()}"
primary_color_ass = format_ass_color(highlight_color)
secondary_color_ass = format_ass_color(base_color)
with open(ass_file_path, 'r', encoding='utf-8') as f_in, open(temp_ass_path, 'w', encoding='utf-8') as f_out:
for line in f_in:
if line.startswith("Style:"):
parts = line.split(',')
parts[2] = str(calculated_font_size) # Fontsize
parts[3] = secondary_color_ass # PrimaryColour (Base text)
parts[4] = primary_color_ass # SecondaryColour (Karaoke fill)
if len(parts) > 17:
parts[16] = '0' # Outline width
parts[17] = '0' # Shadow width
f_out.write(','.join(parts))
else:
f_out.write(line)
escaped_ass_path = temp_ass_path.replace('\\', '/').replace(':', r'\\:')
vf_filter = f"subtitles='{escaped_ass_path}'"
cmd = ["ffmpeg", "-y", "-i", video_path, "-vf", vf_filter, "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", output_video_path]
run_ffmpeg_command(cmd, "Burning karaoke subtitles into video...")
return output_video_path
finally:
if temp_ass_path and os.path.exists(temp_ass_path):
os.remove(temp_ass_path)
def burn_subtitles_wrapper(video_path, subtitle_files, style, font_size_scale, block_font_color, block_words_per_line, kara_base_color, kara_highlight_color):
if not video_path or not subtitle_files: raise gr.Error("Missing video or subtitle files. Please transcribe first.")
if style == "Block":
return burn_block_subtitles(video_path, subtitle_files, font_size_scale, block_font_color, block_words_per_line)
elif style == "Karaoke":
return burn_karaoke_subtitles(video_path, subtitle_files, font_size_scale, kara_base_color, kara_highlight_color)
else:
raise gr.Error("Invalid subtitle style selected.")
def remove_background_single(input_path, output_path, **kwargs):
with Image.open(input_path) as img:
remove(img).save(output_path)
def remove_background_batch(files):
output_paths, zip_path, _ = batch_image_processor(files, remove_background_single, "bg_removed")
return output_paths, zip_path
def resize_convert_single_image(input_path, output_path, **kwargs):
output_format = kwargs.get('output_format', 'JPG')
quality = kwargs.get('quality', 95)
enable_resize = kwargs.get('enable_resize', False)
max_w = kwargs.get('max_w', 1024)
max_h = kwargs.get('max_h', 1024)
resize_mode = kwargs.get('resize_mode', "Fit (preserve aspect ratio)")
with Image.open(input_path) as img:
if output_format in ['JPG', 'WEBP'] and img.mode in ['RGBA', 'P', 'LA']:
img = img.convert("RGB")
if enable_resize:
if resize_mode == "Fit (preserve aspect ratio)":
img.thumbnail((max_w, max_h), Image.Resampling.LANCZOS)
else: # Stretch
img = img.resize((max_w, max_h), Image.Resampling.LANCZOS)
save_kwargs = {}
pil_format = 'JPEG' if output_format == 'JPG' else output_format
if pil_format in ['JPEG', 'WEBP']:
save_kwargs['quality'] = quality
img.save(output_path, pil_format, **save_kwargs)
def batch_resize_convert_images(files, output_format, quality, enable_resize, max_w, max_h, resize_mode):
if not files: raise gr.Error("Please upload at least one image.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_name = "resized_converted"
job_temp_dir = os.path.join(TEMP_DIR, f"{job_name}_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True)
output_paths = []
# Enable resizing if dimensions are provided, even if checkbox is somehow out of sync
if max_w > 0 and max_h > 0:
enable_resize = True
processing_kwargs = {
'output_format': output_format, 'quality': quality, 'enable_resize': enable_resize,
'max_w': max_w, 'max_h': max_h, 'resize_mode': resize_mode
}
for file_obj in files:
try:
base, _ = os.path.splitext(os.path.basename(file_obj.name))
output_filename = f"{base}.{output_format.lower()}"
output_path = os.path.join(job_temp_dir, output_filename)
resize_convert_single_image(file_obj.name, output_path, **processing_kwargs)
output_paths.append(output_path)
except Exception as e: print(f"Skipping file {file_obj.name} due to error: {e}"); continue
if not output_paths: shutil.rmtree(job_temp_dir); raise gr.Error("No images could be processed.")
zip_base_name = os.path.join(TEMP_DIR, f"{job_name}_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return output_paths[:100], zip_path
def apply_watermark_single(input_path, output_path, watermark_text, position, opacity):
with Image.open(input_path).convert("RGBA") as image:
if not watermark_text: raise ValueError("Watermark text cannot be empty.")
txt = Image.new("RGBA", image.size, (255, 255, 255, 0))
try: font = ImageFont.truetype("DejaVuSans.ttf", int(image.width / 20))
except IOError: font = ImageFont.load_default()
d = ImageDraw.Draw(txt); bbox = d.textbbox((0, 0), watermark_text, font=font); w, h = bbox[2]-bbox[0], bbox[3]-bbox[1]
pos_map = {"Top-Left":(10,10), "Top-Right":(image.width-w-10,10), "Bottom-Left":(10,image.height-h-10), "Bottom-Right":(image.width-w-10,image.height-h-10), "Center":((image.width-w)/2,(image.height-h)/2)}
d.text(pos_map[position], watermark_text, font=font, fill=(255, 255, 255, int(255 * (opacity / 100))))
Image.alpha_composite(image, txt).convert("RGB").save(output_path)
def apply_watermark_batch(files, watermark_text, position, opacity):
if not watermark_text: raise gr.Error("Please provide watermark text.")
processing_func = lambda input_path, output_path: apply_watermark_single(
input_path, output_path, watermark_text=watermark_text, position=position, opacity=opacity
)
output_paths, zip_path, _ = batch_image_processor(files, processing_func, "watermarked")
return output_paths, zip_path
# --- BATCH CONVERTER REPLACEMENT FUNCTIONS ---
def convert_compress_video(video_path, out_format, v_codec, crf_value, scale_option, a_codec, a_bitrate, output_dir=None, base_name=None):
if not video_path:
raise gr.Error("Please upload a video to convert.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
_output_dir = output_dir if output_dir else TEMP_DIR
_base_name = base_name if base_name else f"converted_{timestamp}"
output_filename = f"{_base_name}.{out_format.lower()}"
output_path = os.path.join(_output_dir, output_filename)
cmd = ["ffmpeg", "-i", video_path]
vf_filters = []
if scale_option != "Original":
w, h = get_video_dimensions(video_path)
if w > 0 and h > 0:
target_h = int(scale_option.replace('p', ''))
target_w = round(w * target_h / h / 2) * 2
vf_filters.append(f"scale={target_w}:{target_h}")
vf_filters.append("pad=ceil(iw/2)*2:ceil(ih/2)*2")
vf_filters.append("setsar=1")
if vf_filters:
cmd.extend(["-vf", ",".join(vf_filters)])
cmd.extend(["-c:v", v_codec])
if v_codec in ["libx264", "libx265"]:
cmd.extend(["-crf", str(crf_value)])
cmd.extend(["-pix_fmt", "yuv420p"])
if has_audio_stream(video_path):
if a_codec == "copy":
cmd.extend(["-c:a", "copy"])
else:
cmd.extend(["-c:a", a_codec, "-b:a", f"{a_bitrate}k"])
else:
cmd.append("-an")
if out_format.lower() in ["mp4", "mov"]:
cmd.extend(["-movflags", "+faststart"])
cmd.extend(["-y", output_path])
run_ffmpeg_command(cmd, f"Converting {os.path.basename(video_path)}.")
return output_path
def batch_convert_compress_videos(files, out_format, v_codec, crf_value, scale_option, a_codec, a_bitrate, progress=gr.Progress(track_tqdm=True)):
if not files:
raise gr.Error("Please upload at least one video to convert.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"batch_convert_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
output_paths = []
for video_file in progress.tqdm(files, desc="Converting videos"):
try:
base_name = os.path.splitext(os.path.basename(video_file.name))[0]
output_path = convert_compress_video(
video_path=video_file.name,
out_format=out_format,
v_codec=v_codec,
crf_value=crf_value,
scale_option=scale_option,
a_codec=a_codec,
a_bitrate=a_bitrate,
output_dir=job_temp_dir,
base_name=base_name
)
output_paths.append(output_path)
except Exception as e:
gr.Warning(f"Skipping file {os.path.basename(video_file.name)} due to an error: {e}")
continue
if not output_paths:
shutil.rmtree(job_temp_dir)
raise gr.Error("No videos could be processed from the batch.")
zip_base_name = os.path.join(TEMP_DIR, f"video_convert_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return output_paths, zip_path
def convert_audio(media_path, out_format, a_bitrate, output_dir=None, base_name=None):
if not media_path:
raise gr.Error("Please provide a media file to convert.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
_output_dir = output_dir if output_dir else TEMP_DIR
_base_name = base_name if base_name else f"audio_converted_{timestamp}"
output_filename = f"{_base_name}.{out_format.lower()}"
output_path = os.path.join(_output_dir, output_filename)
cmd = ["ffmpeg", "-i", media_path, "-vn"]
if out_format == "mp3":
cmd.extend(["-c:a", "libmp3lame", "-b:a", f"{a_bitrate}k"])
elif out_format == "aac":
cmd.extend(["-c:a", "aac", "-b:a", f"{a_bitrate}k"])
elif out_format == "ogg":
cmd.extend(["-c:a", "libopus", "-b:a", f"{a_bitrate}k"])
elif out_format == "wav":
cmd.extend(["-c:a", "pcm_s16le"])
elif out_format == "flac":
cmd.extend(["-c:a", "flac"])
cmd.extend(["-y", output_path])
run_ffmpeg_command(cmd, f"Converting audio from {os.path.basename(media_path)}...")
return output_path
def batch_convert_audio(files, out_format, a_bitrate, progress=gr.Progress(track_tqdm=True)):
if not files:
raise gr.Error("Please upload at least one file to convert.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"batch_audio_convert_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
output_paths = []
for media_file in progress.tqdm(files, desc="Converting audio"):
try:
is_video = get_file_type(media_file.name) == 'video'
if is_video and not has_audio_stream(media_file.name):
gr.Warning(f"Skipping video '{os.path.basename(media_file.name)}' as it has no audio track.")
continue
base_name = os.path.splitext(os.path.basename(media_file.name))[0]
output_path = convert_audio(
media_path=media_file.name,
out_format=out_format,
a_bitrate=a_bitrate,
output_dir=job_temp_dir,
base_name=base_name
)
output_paths.append(output_path)
except Exception as e:
gr.Warning(f"Skipping file {os.path.basename(media_file.name)} due to an error: {e}")
continue
if not output_paths:
shutil.rmtree(job_temp_dir)
raise gr.Error("No files could be processed from the batch.")
zip_base_name = os.path.join(TEMP_DIR, f"audio_convert_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return output_paths[0], zip_path
# --- END BATCH CONVERTER REPLACEMENT FUNCTIONS ---
def apply_video_fade(video_path, fade_in_duration, fade_out_duration):
if not video_path: raise gr.Error("Please upload a video.")
video_duration = get_media_duration(video_path)
if fade_in_duration + fade_out_duration > video_duration: raise gr.Error("The sum of fade durations cannot be greater than the video duration.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"faded_video_{timestamp}.mp4")
fade_filters = []
if fade_in_duration > 0: fade_filters.append(f"fade=t=in:st=0:d={fade_in_duration}")
if fade_out_duration > 0: fade_out_start = video_duration - fade_out_duration; fade_filters.append(f"fade=t=out:st={fade_out_start}:d={fade_out_duration}")
if not fade_filters: gr.Info("No fade applied."); return video_path
cmd = ["ffmpeg", "-i", video_path, "-vf", ",".join(fade_filters), "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]
run_ffmpeg_command(cmd, "Applying video fade...")
return output_video_path
# --- ACCURATE Color Grading Functions ---
def preview_color_grading_ffmpeg(image_np, brightness, contrast, saturation, sharpness):
"""Applies color grading to a single frame using FFMPEG for an accurate preview."""
if image_np is None:
return None
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
input_path = os.path.join(TEMP_DIR, f"cg_preview_in_{timestamp}.png")
output_path = os.path.join(TEMP_DIR, f"cg_preview_out_{timestamp}.png")
try:
Image.fromarray(image_np).save(input_path)
eq_filters, other_filters = [], []
if brightness != 0.0: eq_filters.append(f"brightness={brightness}")
if contrast != 1.0: eq_filters.append(f"contrast={contrast}")
if saturation != 1.0: eq_filters.append(f"saturation={saturation}")
if sharpness > 0.0: other_filters.append(f"unsharp=5:5:{sharpness}")
vf_parts = []
if eq_filters: vf_parts.append("eq=" + ":".join(eq_filters))
if other_filters: vf_parts.extend(other_filters)
if not vf_parts:
return Image.fromarray(image_np)
vf_string = ",".join(vf_parts)
cmd = ["ffmpeg", "-i", input_path, "-vf", vf_string, "-y", output_path]
subprocess.run(cmd, capture_output=True, text=True, check=False)
if os.path.exists(output_path):
with Image.open(output_path) as img:
return img.copy()
else:
return Image.fromarray(image_np)
except Exception as e:
print(f"Error in FFMPEG preview: {e}")
return Image.fromarray(image_np)
finally:
if os.path.exists(input_path): os.remove(input_path)
if os.path.exists(output_path): os.remove(output_path)
def apply_color_grading(video_path, brightness, contrast, saturation, sharpness):
"""Applies color grading to a full video using FFMPEG."""
if not video_path:
raise gr.Error("Please upload a video first.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"graded_video_{timestamp}.mp4")
eq_filters, other_filters = [], []
if brightness != 0.0: eq_filters.append(f"brightness={brightness}")
if contrast != 1.0: eq_filters.append(f"contrast={contrast}")
if saturation != 1.0: eq_filters.append(f"saturation={saturation}")
if sharpness > 0.0: other_filters.append(f"unsharp=5:5:{sharpness}")
vf_parts = []
if eq_filters: vf_parts.append("eq=" + ":".join(eq_filters))
if other_filters: vf_parts.extend(other_filters)
if not vf_parts:
gr.Info("No adjustments made. Returning original video path.")
return video_path
vf_string = ",".join(vf_parts)
cmd = ["ffmpeg", "-i", video_path, "-vf", vf_string, "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]
run_ffmpeg_command(cmd, "Applying Color Grading...")
return output_video_path
# --- END ACCURATE Color Grading Functions ---
def trim_and_fade_audio(audio_path, start_time, end_time, fade_in_duration, fade_out_duration):
if not audio_path: raise gr.Error("Please upload an audio file.")
audio_duration = get_media_duration(audio_path)
if start_time < 0: start_time = 0
if end_time <= 0 or end_time > audio_duration: end_time = audio_duration
if start_time >= end_time: raise gr.Error("Start time must be less than end time.")
trimmed_duration = end_time - start_time
if fade_in_duration + fade_out_duration > trimmed_duration: raise gr.Error("Sum of fade durations cannot be greater than the trimmed audio duration.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_audio_path = os.path.join(TEMP_DIR, f"edited_audio_{timestamp}.mp3")
af_filters = []
if fade_in_duration > 0: af_filters.append(f"afade=t=in:st=0:d={fade_in_duration}")
if fade_out_duration > 0: fade_out_start = trimmed_duration - fade_out_duration; af_filters.append(f"afade=t=out:st={fade_out_start}:d={fade_out_duration}")
cmd = ["ffmpeg", "-ss", str(start_time), "-to", str(end_time), "-i", audio_path]
if af_filters: cmd.extend(["-af", ",".join(af_filters)])
cmd.extend(["-y", output_audio_path])
run_ffmpeg_command(cmd, "Trimming and fading audio...")
return output_audio_path
# In app.py, replace the existing create_gradual_ramp_video function with this one.
def create_gradual_ramp_video(video_path, progress=gr.Progress(track_tqdm=True)):
"""
Creates a video with a gradual speed ramp: 1x -> 0.5x -> 1x.
The effect is applied over the entire duration of the video.
This uses a piecewise approximation with frame interpolation for a smooth result.
--- ROBUSTNESS ENHANCEMENT ---
A hybrid approach is used:
1. For videos < 2.0s: A simplified, robust method applies an *average* speed change across
the whole clip. This avoids errors from creating many tiny, unstable segments.
2. For videos >= 2.0s: The original advanced segmentation logic is used to create a
more detailed and noticeable ramp effect.
"""
if not video_path:
raise gr.Error("Please upload a video to process.")
progress(0, desc="Analyzing video properties...")
duration = get_media_duration(video_path)
if duration == 0:
raise gr.Error("Could not determine video duration. The file may be corrupt.")
fps = get_video_fps(video_path)
has_audio = has_audio_stream(video_path)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"gradual_ramp_{timestamp}.mp4")
# --- THIS IS THE FIX: A DEDICATED PATH FOR SHORT VIDEOS ---
# For very short videos, the complex ramp is barely noticeable and prone to ffmpeg errors.
# We switch to a simpler, more stable method by applying an average speed change.
if duration < 2.0:
gr.Info("Video is short (< 2s). Applying a simplified, robust ramp effect.")
progress(0.2, desc="Applying simplified ramp for short video...")
# The integral of the speed curve from 1->0.5->1 gives a total duration multiplier of 1.5.
# So, the average speed is original_duration / new_duration = 1 / 1.5 = 2/3.
avg_speed = 2.0 / 3.0
# Calculate the target interpolated FPS to create new frames for the slowdown.
interpolated_fps = fps / avg_speed
filter_complex_parts = []
# Video filter: interpolate to the new framerate, then adjust timestamps to slow it down.
video_filter = f"[0:v]minterpolate=fps={interpolated_fps}:mi_mode=mci,setpts=PTS/{avg_speed}[vout]"
filter_complex_parts.append(video_filter)
# Audio filter: apply the same speed change to the audio.
if has_audio:
atempo_str = _get_atempo_filter_string(avg_speed)
audio_filter = f"[0:a]asetpts=PTS"
if atempo_str:
audio_filter += f",{atempo_str}"
audio_filter += f"[aout]"
filter_complex_parts.append(audio_filter)
filter_complex_str = ";".join(filter_complex_parts)
# Build the simplified ffmpeg command
cmd = ["ffmpeg", "-y", "-i", video_path, "-filter_complex", filter_complex_str, "-map", "[vout]"]
if has_audio:
cmd.extend(["-map", "[aout]"])
cmd.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-crf", "18", output_video_path])
progress(0.6, desc="Executing simplified FFMPEG command...")
run_ffmpeg_command(cmd, "Applying simplified speed ramp...")
return output_video_path
# --- Standard logic for videos >= 2.0 seconds ---
progress(0.1, desc="Planning detailed speed ramp...")
# Determine the number of segments to approximate the curve.
# More segments = smoother, but more complex command. Capped at 60 for performance.
target_segment_duration = 0.25 # Aim for segments of this length
num_segments = int(duration / target_segment_duration)
if num_segments % 2 != 0:
num_segments += 1 # Ensure even number of segments for symmetry
num_segments = max(10, min(num_segments, 60)) # Clamp between 10 and 60
min_speed = 0.5
half_segments = num_segments / 2.0
filter_complex_parts = []
video_outputs, audio_outputs = [], []
# Loop through each segment and build the corresponding filter chain
for i in progress.tqdm(range(num_segments), desc="Building FFMPEG filter command..."):
start_time = i * duration / num_segments
end_time = (i + 1) * duration / num_segments
# Parabolic speed calculation (y = ax^2 + c) for smooth ease-in/out
x = (i - half_segments + 0.5) / half_segments
speed = (1.0 - min_speed) * (x ** 2) + min_speed
speed = max(0.01, speed) # Prevent speed from being zero
# Video processing for this segment
interpolated_fps_seg = fps / speed
setpts_val_seg = 1.0 / speed
video_filter = (
f"[0:v]trim=start={start_time}:end={end_time},setpts=PTS-STARTPTS," # Cut the segment
f"minterpolate=fps={interpolated_fps_seg}:mi_mode=mci," # Interpolate frames for smoothness
f"setpts={setpts_val_seg}*PTS[v{i}]" # Adjust speed
)
filter_complex_parts.append(video_filter)
video_outputs.append(f"[v{i}]")
# Audio processing for this segment
if has_audio:
atempo_str_seg = _get_atempo_filter_string(speed)
audio_filter = f"[0:a]atrim=start={start_time}:end={end_time},asetpts=PTS-STARTPTS"
if atempo_str_seg:
audio_filter += f",{atempo_str_seg}"
audio_filter += f"[a{i}]"
filter_complex_parts.append(audio_filter)
audio_outputs.append(f"[a{i}]")
progress(0.5, desc="Finalizing filter command...")
# Concatenate all processed video and audio segments
concat_filter_v = f"{''.join(video_outputs)}concat=n={num_segments}:v=1:a=0[vout]"
filter_complex_parts.append(concat_filter_v)
if has_audio and audio_outputs:
concat_filter_a = f"{''.join(audio_outputs)}concat=n={num_segments}:v=0:a=1[aout]"
filter_complex_parts.append(concat_filter_a)
filter_complex_str = ";".join(filter_complex_parts)
# Build the final complex ffmpeg command
cmd = ["ffmpeg", "-y", "-i", video_path, "-filter_complex", filter_complex_str, "-map", "[vout]"]
if has_audio and audio_outputs:
cmd.extend(["-map", "[aout]"])
cmd.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-crf", "18", output_video_path])
progress(0.6, desc="Executing FFMPEG... This may take a while.")
run_ffmpeg_command(cmd, "Applying gradual speed ramp...")
return output_video_path
# --- FLUX API ---
FLUX_MODELS = {"FLUX.1-schnell (Fast)": "black-forest-labs/FLUX.1-schnell", "FLUX.1-dev (High Quality)": "black-forest-labs/FLUX.1-dev"}
def call_flux_api(prompt, model_choice, width, height, hf_token):
if not hf_token: raise gr.Error("Hugging Face User Access Token is required.")
try:
client = Client(FLUX_MODELS[model_choice], hf_token=hf_token)
return client.predict(prompt=prompt, seed=0, randomize_seed=True, width=width, height=height, num_inference_steps=8 if "dev" in model_choice else 4, api_name="/infer")[0]
except Exception as e: raise gr.Error(f"API call failed: {e}")
def get_image_as_base64(path):
try:
with open(path, "rb") as f: return f"data:image/png;base64,{base64.b64encode(f.read()).decode('utf-8')}"
except FileNotFoundError: return None
# --- Transfer Tab Functions (Simplified) ---
def filter_presets(query, all_presets):
if not query:
return gr.update(choices=sorted(list(all_presets.keys())))
filtered_keys = [key for key in all_presets.keys() if query.lower() in key.lower()]
return gr.update(choices=sorted(filtered_keys))
def save_preset(presets, name, url):
if not name or not name.strip():
gr.Warning("Preset name cannot be empty."); return presets, gr.update()
if not url or not url.strip():
gr.Warning("Target URL cannot be empty."); return presets, gr.update()
presets[name] = url
gr.Info(f"Preset '{name}' saved!")
return presets, gr.update(choices=sorted(list(presets.keys())))
def delete_preset(presets, name):
if name in presets:
del presets[name]
gr.Info(f"Preset '{name}' deleted!")
return presets, gr.update(choices=sorted(list(presets.keys())), value=None), ""
gr.Warning(f"Preset '{name}' not found.")
return presets, gr.update(), gr.update()
def load_preset(presets, name):
return presets.get(name, "")
# --- Join/Beat-Sync/Etc Video Feature Functions ---
def ping_pong_video(video_path, audio_option):
if not video_path: raise gr.Error("Please upload a video.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"pingpong_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True)
reversed_video_path = os.path.join(job_temp_dir, "reversed_temp.mp4")
cmd_reverse = ["ffmpeg", "-i", video_path, "-vf", "reverse"]
if audio_option == "Reverse Audio": cmd_reverse.extend(["-af", "areverse"])
else: cmd_reverse.append("-an")
cmd_reverse.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", reversed_video_path])
run_ffmpeg_command(cmd_reverse)
file_list_path = os.path.join(job_temp_dir, "files.txt")
with open(file_list_path, 'w', encoding='utf-8') as f:
f.write(f"file '{os.path.abspath(video_path)}'\n")
f.write(f"file '{os.path.abspath(reversed_video_path)}'\n")
output_video_path = os.path.join(TEMP_DIR, f"pingpong_video_{timestamp}.mp4")
cmd_join = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", output_video_path]
if audio_option == "Original Audio Only":
cmd_join = ["ffmpeg", "-i", video_path, "-i", reversed_video_path, "-filter_complex", "[0:v][1:v]concat=n=2:v=1[v]", "-map", "[v]", "-map", "0:a?", "-c:a", "copy", "-y", output_video_path]
run_ffmpeg_command(cmd_join)
shutil.rmtree(job_temp_dir)
return output_video_path
# ### --- NEW FEATURE: VIDEO STABILIZATION --- ###
def stabilize_video(video_path, shakiness, smoothing):
"""
Stabilizes a video using a two-pass FFMPEG process.
Pass 1: Detects motion vectors.
Pass 2: Uses the motion vectors to smooth the video.
"""
if not video_path:
raise gr.Error("Please upload a video to stabilize.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
transforms_path = os.path.join(TEMP_DIR, f"transforms_{timestamp}.trf")
output_video_path = os.path.join(TEMP_DIR, f"stabilized_{timestamp}.mp4")
try:
# Pass 1: Detect shakiness
detect_cmd = [
"ffmpeg", "-i", video_path,
"-vf", f"vidstabdetect=shakiness={shakiness}:result={transforms_path}",
"-f", "null", "-"
]
run_ffmpeg_command(detect_cmd, "Analyzing video for stabilization (Pass 1/2)...")
# Pass 2: Apply stabilization
transform_cmd = [
"ffmpeg", "-i", video_path,
"-vf", f"vidstabtransform=input={transforms_path}:smoothing={smoothing}:optalgo=gauss",
"-c:a", "copy",
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-y",
output_video_path
]
run_ffmpeg_command(transform_cmd, "Applying stabilization (Pass 2/2)...")
finally:
# Clean up the temporary transforms file
if os.path.exists(transforms_path):
os.remove(transforms_path)
return output_video_path
# ### --- NEW FEATURE: AUTO JUMP-CUT & WAVEFORM PREVIEW --- ###
def generate_waveform_preview(video_path):
"""Generates a PNG image of the audio waveform."""
if not video_path or not has_audio_stream(video_path):
return None # Return None if no video or no audio
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_image_path = os.path.join(TEMP_DIR, f"waveform_{timestamp}.png")
# FFMPEG command to generate a waveform picture
cmd = [
"ffmpeg", "-i", video_path,
"-filter_complex", "aformat=channel_layouts=mono,compand,showwavespic=s=1280x240:colors=#38bdf8",
"-frames:v", "1",
"-y", output_image_path
]
try:
# Use subprocess.run and check for errors, but don't raise gr.Error to avoid stopping the UI
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return output_image_path
except (subprocess.CalledProcessError, FileNotFoundError) as e:
print(f"--- WAVEFORM GENERATION ERROR ---\n{e}")
return None
def auto_jump_cut(video_path, silence_threshold, min_silence_duration, resolution_choice, custom_w, custom_h, progress=gr.Progress(track_tqdm=True)):
"""
Automatically removes silent parts from a video and stitches the remaining parts together.
"""
if not video_path:
raise gr.Error("Please upload a video to process.")
if not has_audio_stream(video_path):
raise gr.Error("The uploaded video has no audio track. Cannot detect silence.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"jumpcut_{timestamp}.mp4")
# --- 1. Detect Silence ---
progress(0.1, desc="Analyzing for silent sections...")
silence_cmd = [
"ffmpeg", "-i", video_path,
"-af", f"silencedetect=noise={silence_threshold}dB:d={min_silence_duration}",
"-f", "null", "-"
]
print(f"Running silence detection: {' '.join(silence_cmd)}")
result = subprocess.run(silence_cmd, capture_output=True, text=True, encoding='utf-8')
silence_starts = [float(x) for x in re.findall(r'silence_start: (\d+\.?\d*)', result.stderr)]
silence_ends = [float(x) for x in re.findall(r'silence_end: (\d+\.?\d*)', result.stderr)]
if not silence_starts:
gr.Info("No silence was detected with the current settings. Returning original video.")
return video_path
silences = list(zip(silence_starts, silence_ends))
# --- 2. Calculate Segments to Keep ---
progress(0.3, desc="Calculating video cuts...")
video_duration = get_media_duration(video_path)
keep_segments = []
last_silence_end = 0.0
for start, end in silences:
if start > last_silence_end:
keep_segments.append((last_silence_end, start))
last_silence_end = end
if last_silence_end < video_duration:
keep_segments.append((last_silence_end, video_duration))
if not keep_segments:
raise gr.Error("Failed to calculate any segments to keep. Try adjusting silence parameters.")
# --- 3. Build the FFMPEG Filter Complex Command ---
progress(0.5, desc="Building FFMPEG command...")
scale_pad_filter = ""
target_w, target_h = get_video_dimensions(video_path)
if resolution_choice != "Keep Original":
if resolution_choice == "1080p (1920x1080)":
target_w, target_h = 1920, 1080
elif resolution_choice == "Portrait (1080x1920)":
target_w, target_h = 1080, 1920
elif resolution_choice == "Custom":
target_w, target_h = int(custom_w), int(custom_h)
target_w -= target_w % 2
target_h -= target_h % 2
scale_pad_filter = f"scale={target_w}:{target_h}:force_original_aspect_ratio=decrease,pad={target_w}:{target_h}:(ow-iw)/2:(oh-ih)/2,setsar=1"
filter_complex_parts = []
video_outputs = []
audio_outputs = []
for i, (start, end) in enumerate(keep_segments):
filter_complex_parts.append(f"[0:v]trim=start={start}:end={end},setpts=PTS-STARTPTS[v{i}]")
filter_complex_parts.append(f"[0:a]atrim=start={start}:end={end},asetpts=PTS-STARTPTS[a{i}]")
if scale_pad_filter:
filter_complex_parts.append(f"[v{i}]{scale_pad_filter}[scaled_v{i}]")
video_outputs.append(f"[scaled_v{i}]")
else:
video_outputs.append(f"[v{i}]")
audio_outputs.append(f"[a{i}]")
filter_complex_parts.append(f"{''.join(video_outputs)}concat=n={len(keep_segments)}:v=1:a=0[vout]")
filter_complex_parts.append(f"{''.join(audio_outputs)}concat=n={len(keep_segments)}:v=0:a=1[aout]")
filter_complex_str = ";".join(filter_complex_parts)
# --- 4. Execute the Final Command ---
final_cmd = [
"ffmpeg", "-i", video_path,
"-filter_complex", filter_complex_str,
"-map", "[vout]", "-map", "[aout]",
"-c:v", "libx264", "-pix_fmt", "yuv420p",
"-c:a", "aac",
"-y", output_video_path
]
progress(0.7, desc="Generating final jump-cut video...")
run_ffmpeg_command(final_cmd, desc="Stitching video segments...")
return output_video_path
# ### --- NEW FEATURE: VIDEO SILENCE CHOPPER --- ###
def chop_video_on_silence(video_path, silence_threshold, min_silence_duration, resolution_choice, custom_w, custom_h, progress=gr.Progress(track_tqdm=True)):
"""
Splits a video into multiple clips, removing the silent parts.
"""
if not video_path:
raise gr.Error("Please upload a video to process.")
if not has_audio_stream(video_path):
raise gr.Error("The uploaded video has no audio track. Cannot detect silence.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"video_chop_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
output_paths = []
progress(0.1, desc="Analyzing for silent sections...")
silence_cmd = [
"ffmpeg", "-i", video_path,
"-af", f"silencedetect=noise={silence_threshold}dB:d={min_silence_duration}",
"-f", "null", "-"
]
result = subprocess.run(silence_cmd, capture_output=True, text=True, encoding='utf-8')
silence_starts = [float(x) for x in re.findall(r'silence_start: (\d+\.?\d*)', result.stderr)]
silence_ends = [float(x) for x in re.findall(r'silence_end: (\d+\.?\d*)', result.stderr)]
if not silence_starts:
shutil.rmtree(job_temp_dir)
raise gr.Error("No silence was detected with the current settings. Try adjusting the parameters.")
silences = list(zip(silence_starts, silence_ends))
progress(0.3, desc="Calculating video cuts...")
video_duration = get_media_duration(video_path)
keep_segments = []
last_silence_end = 0.0
for start, end in silences:
if start > last_silence_end:
keep_segments.append((last_silence_end, start))
last_silence_end = end
if last_silence_end < video_duration:
keep_segments.append((last_silence_end, video_duration))
if not keep_segments:
shutil.rmtree(job_temp_dir)
raise gr.Error("Failed to calculate any segments to keep.")
vf_filter = None
if resolution_choice != "Keep Original":
if resolution_choice == "1080p (1920x1080)":
target_w, target_h = 1920, 1080
elif resolution_choice == "Portrait (1080x1920)":
target_w, target_h = 1080, 1920
elif resolution_choice == "Custom":
target_w, target_h = int(custom_w), int(custom_h)
target_w -= target_w % 2
target_h -= target_h % 2
vf_filter = f"scale={target_w}:{target_h}:force_original_aspect_ratio=decrease,pad={target_w}:{target_h}:(ow-iw)/2:(oh-ih)/2,setsar=1"
for i, (start, end) in enumerate(progress.tqdm(keep_segments, desc="Exporting video clips...")):
output_clip_path = os.path.join(job_temp_dir, f"clip_{i:04d}.mp4")
duration = end - start
cmd = ["ffmpeg", "-y", "-ss", str(start), "-to", str(end), "-i", video_path]
if vf_filter:
# Re-encoding is necessary
cmd.extend(["-vf", vf_filter, "-c:v", "libx264", "-pix_fmt", "yuv420p", "-c:a", "aac"])
else:
# Can use fast stream copy
cmd.extend(["-c", "copy"])
cmd.append(output_clip_path)
try:
run_ffmpeg_command(cmd)
output_paths.append(output_clip_path)
except Exception as e:
gr.Warning(f"Skipping a clip due to an error: {e}")
continue
if not output_paths:
shutil.rmtree(job_temp_dir)
raise gr.Error("No video clips could be exported.")
zip_base_name = os.path.join(TEMP_DIR, f"video_chop_archive_{timestamp}")
zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir)
return output_paths, zip_path
# --- STORYBOARD / ANIMATIC CREATOR FUNCTIONS ---
def get_file_type(file_path):
if not file_path: return "unknown"
image_exts = ['.png', '.jpg', '.jpeg', '.webp', '.bmp', '.gif']
video_exts = ['.mp4', '.mov', '.mkv', '.avi', '.webm']
ext = os.path.splitext(file_path.lower())[1]
if ext in image_exts: return "image"
if ext in video_exts: return "video"
return "unknown"
def add_assets_to_bin(files, current_assets):
if not files:
return current_assets, gr.update(value=[a['path'] for a in current_assets] if current_assets else None)
session_id = f"storyboard_session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
asset_session_dir = os.path.join(TEMP_DIR, session_id)
os.makedirs(asset_session_dir, exist_ok=True)
updated_asset_list = list(current_assets)
for file_obj in files:
try:
file_type = get_file_type(file_obj.name)
if file_type == "unknown":
gr.Warning(f"Skipping unknown file type: {os.path.basename(file_obj.name)}")
continue
new_path = os.path.join(asset_session_dir, os.path.basename(file_obj.name))
shutil.copy(file_obj.name, new_path)
updated_asset_list.append({"path": new_path, "name": os.path.basename(new_path), "type": file_type})
except Exception as e:
gr.Warning(f"Error adding asset {os.path.basename(file_obj.name)}: {e}")
return updated_asset_list, gr.update(value=[a['path'] for a in updated_asset_list])
def handle_asset_selection(evt: gr.SelectData, assets_state, timeline_state):
if not evt.selected:
return timeline_state, None
selected_asset = assets_state[evt.index]
new_timeline = list(timeline_state)
item_to_add = {
"path": selected_asset['path'],
"name": selected_asset['name'],
"type": selected_asset['type'],
}
if selected_asset['type'] == 'image':
item_to_add.update({
"duration": 3.0,
"start_time": 0,
"original_duration": 0
})
else: # video
original_duration = get_media_duration(selected_asset['path'])
if original_duration <= 0:
gr.Warning(f"Could not read duration for '{selected_asset['name']}'. Defaulting to 3.0 seconds. The file may be corrupt or in an unsupported format.")
original_duration = 3.0
item_to_add.update({
"duration": round(original_duration, 2),
"start_time": 0.0,
"original_duration": round(original_duration, 2)
})
new_timeline.append(item_to_add)
gr.Info(f"Added '{selected_asset['name']}' to timeline.")
preview_frames = None
if selected_asset['type'] == 'video':
try:
preview_frames = extract_first_last_frame(selected_asset['path'])
except Exception as e:
print(f"Could not generate preview for {selected_asset['name']}: {e}")
return new_timeline, preview_frames
def add_all_assets_to_timeline(assets_state, timeline_state):
if not assets_state:
gr.Warning("Asset bin is empty.")
return timeline_state
new_timeline = list(timeline_state)
for asset in assets_state:
item_to_add = {
"path": asset['path'],
"name": asset['name'],
"type": asset['type'],
}
if asset['type'] == 'image':
item_to_add.update({
"duration": 3.0,
"start_time": 0,
"original_duration": 0
})
else: # video
original_duration = get_media_duration(asset['path'])
if original_duration <= 0:
gr.Warning(f"Could not read duration for '{asset['name']}'. Defaulting to 3.0 seconds.")
original_duration = 3.0
item_to_add.update({
"duration": round(original_duration, 2),
"start_time": 0.0,
"original_duration": round(original_duration, 2)
})
new_timeline.append(item_to_add)
gr.Info(f"Added {len(assets_state)} assets to the timeline.")
return new_timeline
def update_timeline_df(timeline_state):
if not timeline_state: return gr.update(value=None)
df_data = [[i + 1, item['name'], item['type'], item['duration']] for i, item in enumerate(timeline_state)]
return gr.update(value=df_data)
def handle_timeline_selection(timeline_state, evt: gr.SelectData):
if not evt.selected:
return -1, None, None, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False), 0, 0
index = evt.index[0]
if not (0 <= index < len(timeline_state)):
return -1, None, None, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False), 0, 0
selected_item = timeline_state[index]
preview_val = selected_item['path']
duration_val = selected_item['duration']
can_move_up = index > 0
can_move_down = index < len(timeline_state) - 1
if selected_item['type'] == 'video':
start_time = selected_item.get('start_time', 0.0)
end_time = start_time + selected_item['duration']
return (index, preview_val, duration_val,
gr.update(interactive=can_move_up), gr.update(interactive=can_move_down), gr.update(interactive=True),
gr.update(visible=True), round(start_time, 2), round(end_time, 2))
else: # Image
return (index, preview_val, duration_val,
gr.update(interactive=can_move_up), gr.update(interactive=can_move_down), gr.update(interactive=True),
gr.update(visible=False), 0, 0)
def apply_trim_and_update(timeline_state, selected_index, new_start, new_end):
if selected_index == -1 or not (0 <= selected_index < len(timeline_state)):
gr.Warning("No clip selected in timeline.")
return timeline_state, gr.update()
item_to_update = timeline_state[selected_index]
if item_to_update['type'] != 'video':
gr.Warning("Trimming is only available for video clips.")
return timeline_state, gr.update()
original_duration = item_to_update.get('original_duration', 0)
if not (0 <= new_start < new_end and new_end <= original_duration):
gr.Warning(f"Invalid trim times. Must be between 0 and {original_duration:.2f}s, and start must be before end.")
return timeline_state, gr.update()
new_duration = new_end - new_start
new_timeline = list(timeline_state)
new_timeline[selected_index]['start_time'] = round(new_start, 2)
new_timeline[selected_index]['duration'] = round(new_duration, 2)
gr.Info(f"Clip '{item_to_update['name']}' trimmed. New duration is {new_duration:.2f}s.")
return new_timeline, gr.update(value=round(new_duration, 2))
def update_clip_properties(timeline_state, selected_index, new_duration):
if selected_index == -1 or not (0 <= selected_index < len(timeline_state)):
gr.Warning("No clip selected in timeline.")
return timeline_state
if new_duration <= 0:
gr.Warning("Duration must be a positive number.")
return timeline_state
new_timeline = list(timeline_state)
item_to_update = new_timeline[selected_index]
if item_to_update['type'] == 'video':
start_time = item_to_update.get('start_time', 0.0)
original_duration = item_to_update.get('original_duration', 0.0)
max_possible_duration = original_duration - start_time
if new_duration > max_possible_duration:
gr.Warning(f"Duration cannot exceed available video length from start time ({max_possible_duration:.2f}s). Clamping value.")
new_duration = max_possible_duration
item_to_update['duration'] = round(new_duration, 2)
gr.Info(f"Updated duration for '{item_to_update['name']}'.")
return new_timeline
def handle_timeline_action(timeline_state, selected_index, action):
if selected_index == -1 or not (0 <= selected_index < len(timeline_state)):
gr.Warning("Please select a clip from the timeline first.")
return timeline_state, gr.update()
new_list = list(timeline_state)
new_index = selected_index
if action == "up" and selected_index > 0:
new_list.insert(selected_index - 1, new_list.pop(selected_index))
new_index = selected_index - 1
elif action == "down" and selected_index < len(new_list) - 1:
new_list.insert(selected_index + 1, new_list.pop(selected_index))
new_index = selected_index + 1
elif action == "remove":
new_list.pop(selected_index)
new_index = -1 # Deselect after removing
# Return the new list and tell the UI to select the new index
return new_list, gr.update(selected_index=new_index if new_index != -1 else None)
def set_resolution_from_first_asset(timeline_state):
if not timeline_state:
gr.Warning("Timeline is empty. Cannot determine resolution.")
return gr.update(), gr.update()
first_item = timeline_state[0]
path = first_item['path']
item_type = first_item['type']
w, h = 0, 0
if item_type == 'video':
w, h = get_video_dimensions(path)
elif item_type == 'image':
try:
with Image.open(path) as img:
w, h = img.size
except Exception as e:
print(f"Could not get image dimensions for {path}: {e}")
if w > 0 and h > 0:
gr.Info(f"Set resolution to {w}x{h} based on '{first_item['name']}'.")
return w, h
else:
gr.Warning(f"Could not get dimensions for the first asset: '{first_item['name']}'.")
return gr.update(), gr.update()
def create_animatic(timeline_data, audio_path, out_w, out_h, keep_original_audio):
if not timeline_data:
raise gr.Error("Timeline is empty. Please add assets to the timeline.")
out_w, out_h = int(out_w), int(out_h)
if out_w <= 0 or out_h <= 0:
raise gr.Error("Output width and height must be positive numbers.")
out_w -= out_w % 2
out_h -= out_h % 2
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"animatic_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
clip_paths = []
for i, item in enumerate(timeline_data):
item_path, item_type, item_duration = item['path'], item['type'], item['duration']
start_time = item.get('start_time', 0)
if item_duration <= 0:
gr.Warning(f"Skipping clip '{item['name']}' because its duration is zero.")
continue
output_clip_path = os.path.join(job_temp_dir, f"clip_{i:04d}.mp4")
cmd = ["ffmpeg", "-y"]
vf_base_scale = f"scale={out_w}:{out_h}:force_original_aspect_ratio=decrease,pad={out_w}:{out_h}:(ow-iw)/2:(oh-ih)/2,setsar=1"
if item_type == 'video':
video_has_audio = has_audio_stream(item_path)
if start_time > 0:
cmd.extend(["-ss", str(start_time)])
cmd.extend(["-t", str(item_duration), "-i", item_path])
vf_filters = [f"setpts=PTS-STARTPTS", vf_base_scale]
cmd.extend(["-vf", ",".join(vf_filters)])
if keep_original_audio:
if video_has_audio:
cmd.extend(["-af", "asetpts=PTS-STARTPTS"])
cmd.extend(["-c:a", "aac", "-ar", "44100"])
else:
cmd.extend(["-f", "lavfi", "-t", str(item_duration), "-i", "anullsrc=channel_layout=stereo:sample_rate=44100"])
cmd.extend(["-map", "0:v:0", "-map", "1:a:0"])
cmd.extend(["-c:a", "aac", "-ar", "44100"])
else:
cmd.append("-an")
cmd.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p"])
else: # item_type == 'image'
cmd.extend(["-loop", "1", "-i", item_path, "-t", str(item_duration)])
vf_filter_img = f"{vf_base_scale},format=yuv420p"
cmd.extend(["-vf", vf_filter_img])
if keep_original_audio:
cmd.extend(["-f", "lavfi", "-t", str(item_duration), "-i", "anullsrc=channel_layout=stereo:sample_rate=44100", "-shortest"])
cmd.extend(["-c:a", "aac", "-ar", "44100"])
else:
cmd.append("-an")
cmd.append(output_clip_path)
run_ffmpeg_command(cmd, f"Processing clip {i+1}/{len(timeline_data)}: {item['name']}")
clip_paths.append(output_clip_path)
if not clip_paths:
shutil.rmtree(job_temp_dir)
raise gr.Error("No valid clips were generated. Check clip durations and file integrity.")
combined_video_path = os.path.join(job_temp_dir, "combined_video.mp4")
if len(clip_paths) > 1:
cmd_concat = ["ffmpeg", "-y"]
video_inputs, audio_inputs = [], []
for i, path in enumerate(clip_paths):
cmd_concat.extend(["-i", path])
video_inputs.append(f"[{i}:v]")
if keep_original_audio: audio_inputs.append(f"[{i}:a]")
filter_complex_str = ""
if keep_original_audio:
video_concat_str = "".join(video_inputs) + f"concat=n={len(clip_paths)}:v=1:a=0[v_out];"
audio_concat_str = "".join(audio_inputs) + f"concat=n={len(clip_paths)}:v=0:a=1[a_out]"
filter_complex_str = video_concat_str + audio_concat_str
cmd_concat.extend(["-filter_complex", filter_complex_str, "-map", "[v_out]", "-map", "[a_out]"])
else:
video_concat_str = "".join(video_inputs) + f"concat=n={len(clip_paths)}:v=1:a=0[v_out]"
filter_complex_str = video_concat_str
cmd_concat.extend(["-filter_complex", filter_complex_str, "-map", "[v_out]"])
cmd_concat.append(combined_video_path)
run_ffmpeg_command(cmd_concat, "Joining and Finalizing Video (Robust Mode)...")
else:
if os.path.exists(clip_paths[0]):
shutil.copy(clip_paths[0], combined_video_path)
else:
shutil.rmtree(job_temp_dir)
raise gr.Error("The only clip in the timeline failed to process.")
final_output_path = os.path.join(TEMP_DIR, f"animatic_final_{timestamp}.mp4")
if not keep_original_audio and audio_path:
run_ffmpeg_command(["ffmpeg", "-y", "-i", combined_video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", final_output_path], "Muxing audio")
else:
shutil.move(combined_video_path, final_output_path)
shutil.rmtree(job_temp_dir)
return final_output_path
def detect_bpm(audio_path):
if not audio_path:
return "Please upload an audio track first."
try:
y, sr = librosa.load(audio_path)
tempo_val, _ = librosa.beat.beat_track(y=y, sr=sr)
if isinstance(tempo_val, np.ndarray):
tempo = tempo_val.item()
else:
tempo = float(tempo_val)
if tempo > 0:
return f"Detected BPM: {tempo:.2f}"
else:
return "Could not detect BPM."
except Exception as e:
print(f"--- BPM DETECTION ERROR ---\n{e}")
return "Error: Could not analyze audio file."
def update_new_bpm_display(original_bpm_text, speed_multiplier):
if not original_bpm_text or "Detected" not in original_bpm_text:
return "---"
try:
bpm_match = re.search(r"(\d+\.\d+)", original_bpm_text)
if bpm_match:
original_bpm = float(bpm_match.group(1))
new_bpm = original_bpm * speed_multiplier
return f"Estimated New BPM: {new_bpm:.2f}"
else:
return "---"
except (ValueError, TypeError):
return "---"
def create_rhythmic_animatic(timeline_data, audio_path, measure_choice, out_w, out_h):
if not timeline_data: raise gr.Error("Timeline is empty.")
if not audio_path: raise gr.Error("An audio track is required for rhythmic editing.")
try:
y, sr = librosa.load(audio_path)
tempo_val, _ = librosa.beat.beat_track(y=y, sr=sr)
if isinstance(tempo_val, np.ndarray):
tempo = tempo_val.item()
else:
tempo = float(tempo_val)
if not tempo or tempo <= 0:
raise gr.Error("Could not determine BPM from audio file.")
except Exception as e:
raise gr.Error(f"Audio analysis failed: {e}")
seconds_per_beat = 60.0 / tempo
seconds_per_measure = seconds_per_beat * 4.0
measure_multipliers = { "2 Measures": 2.0, "1 Measure": 1.0, "1/2 Measure": 0.5, "1/4 Measure (Beat)": 0.25 }
clip_duration = seconds_per_measure * measure_multipliers[measure_choice]
rhythmic_timeline = []
for item in timeline_data:
new_item = item.copy()
if new_item['type'] == 'video':
start_time = new_item.get('start_time', 0)
available_duration = new_item.get('original_duration', 0) - start_time
new_item['duration'] = min(clip_duration, available_duration)
else:
new_item['duration'] = clip_duration
rhythmic_timeline.append(new_item)
gr.Info(f"Re-timed {len(rhythmic_timeline)} clips to ~{clip_duration:.2f}s each based on {tempo:.2f} BPM.")
return create_animatic(rhythmic_timeline, audio_path, out_w, out_h, keep_original_audio=False)
# --- NEW CREATIVE FUNCTIONS ---
def _create_auto_trailer_impl(video_path, trailer_duration, clip_duration, analysis_method, transition_style, music_path, out_w, out_h, progress: Progress):
"""Internal implementation of the auto-trailer creator."""
if not video_path: raise gr.Error("Please upload a source video.")
source_duration = get_media_duration(video_path)
if source_duration < trailer_duration:
gr.Warning(f"Source video is only {source_duration:.1f}s long. The trailer duration will be capped at the source video length.")
trailer_duration = source_duration
if clip_duration > trailer_duration:
new_clip_duration = trailer_duration / 2 if trailer_duration > 2 else trailer_duration
gr.Warning(f"Clip duration ({clip_duration}s) is longer than the trailer duration ({trailer_duration:.1f}s). Adjusting clip duration to {new_clip_duration:.1f}s.")
clip_duration = new_clip_duration
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"trailer_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
try:
progress(0, desc="Analyzing video for high-motion scenes...")
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
if fps == 0: fps = 30 # fallback
chunk_duration_frames = int(clip_duration * fps)
video_total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
chunk_scores = []
prev_frame = None
frame_num = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret: break
frame_skip = max(1, int(fps / 5)) # Analyze ~5 frames per second
if frame_num % frame_skip == 0:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (21, 21), 0)
if prev_frame is not None:
frame_delta = cv2.absdiff(prev_frame, gray)
thresh = cv2.threshold(frame_delta, 25, 255, cv2.THRESH_BINARY)[1]
motion_score = np.sum(thresh)
chunk_index = frame_num // chunk_duration_frames
while len(chunk_scores) <= chunk_index:
chunk_scores.append({'start_time': (len(chunk_scores) * clip_duration), 'scores': []})
chunk_scores[chunk_index]['scores'].append(motion_score)
prev_frame = gray
frame_num += 1
if frame_num % 100 == 0:
progress(0.2 * (frame_num / video_total_frames), desc=f"Analyzing frame {frame_num}/{video_total_frames}...")
cap.release()
final_chunks = [{'start_time': chunk['start_time'], 'score': sum(chunk['scores']) / len(chunk['scores'])} for chunk in chunk_scores if chunk['scores']]
if not final_chunks: raise gr.Error("Could not analyze video for motion. Is the video very short or static?")
progress(0.2, desc="Selecting the best clips...")
num_clips_to_select = max(1, int(trailer_duration / clip_duration))
selected_clips_info = sorted(sorted(final_chunks, key=lambda x: x['score'], reverse=True)[:num_clips_to_select], key=lambda x: x['start_time'])
extracted_clips, out_w, out_h = [], int(out_w) - (int(out_w) % 2), int(out_h) - (int(out_h) % 2)
for i, clip_info in enumerate(selected_clips_info):
progress(0.2 + (0.5 * (i / len(selected_clips_info))), desc=f"Extracting clip {i+1}/{len(selected_clips_info)}...")
output_clip_path = os.path.join(job_temp_dir, f"clip_{i:03d}.mp4")
vf_filter = f"scale={out_w}:{out_h}:force_original_aspect_ratio=decrease,pad={out_w}:{out_h}:(ow-iw)/2:(oh-ih)/2,setsar=1"
cmd = ["ffmpeg", "-y", "-ss", str(clip_info['start_time']), "-i", video_path, "-t", str(clip_duration), "-vf", vf_filter, "-an", "-c:v", "libx264", "-pix_fmt", "yuv420p", output_clip_path]
run_ffmpeg_command(cmd)
extracted_clips.append(output_clip_path)
if not extracted_clips: raise gr.Error("Failed to extract any clips.")
progress(0.7, desc="Stitching clips together...")
final_silent_path = os.path.join(job_temp_dir, "final_silent.mp4")
if transition_style == "None" or len(extracted_clips) == 1:
if len(extracted_clips) == 1:
shutil.copy(extracted_clips[0], final_silent_path)
else:
file_list_path = os.path.join(job_temp_dir, "files.txt")
with open(file_list_path, 'w', encoding='utf-8') as f:
for path in extracted_clips: f.write(f"file '{os.path.abspath(path)}'\n")
run_ffmpeg_command(["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", final_silent_path], "Concatenating clips...")
else:
transition_duration = 0.5
cmd = ["ffmpeg", "-y"]
filter_complex = []
running_duration = 0
for i, clip_path in enumerate(extracted_clips):
cmd.extend(["-i", clip_path])
for i in range(len(extracted_clips) - 1):
input1 = f"[{i}:v]" if i == 0 else f"[v{i-1}]"
input2 = f"[{i+1}:v]"
output = f"[v{i}]"
offset = max(0, running_duration + clip_duration - transition_duration)
filter_complex.append(f"{input1}{input2}xfade=transition={transition_style.lower()}:duration={transition_duration}:offset={offset}{output}")
running_duration += clip_duration - transition_duration
cmd.extend([
"-filter_complex", ";".join(filter_complex),
"-map", f"[v{len(extracted_clips)-2}]",
"-c:v", "libx264", "-pix_fmt", "yuv420p",
final_silent_path
])
run_ffmpeg_command(cmd, "Applying transitions...")
progress(0.95, desc="Adding background music...")
final_output_path = os.path.join(TEMP_DIR, f"trailer_final_{timestamp}.mp4")
if music_path:
run_ffmpeg_command(["ffmpeg", "-y", "-i", final_silent_path, "-i", music_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", final_output_path], "Muxing audio")
else:
shutil.move(final_silent_path, final_output_path)
return final_output_path
finally:
if os.path.exists(job_temp_dir): shutil.rmtree(job_temp_dir)
def auto_trailer_wrapper(video_path, trailer_duration, clip_duration, analysis_method, transition_style, music_path, out_w, out_h, progress=gr.Progress(track_tqdm=True)):
return _create_auto_trailer_impl(video_path, trailer_duration, clip_duration, analysis_method, transition_style, music_path, out_w, out_h, progress)
def generate_waveform_video(video_path, style, size, position, color):
if not video_path: raise gr.Error("Please upload a video first.")
if not has_audio_stream(video_path):
raise gr.Error("The uploaded video has no audio track. A waveform cannot be generated.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"waveform_video_{timestamp}.mp4")
pos_map = {
"Bottom": f"overlay=x=(W-w)/2:y=H-h-50",
"Center": f"overlay=x=(W-w)/2:y=(H-h)/2",
"Top": f"overlay=x=(W-w)/2:y=50"
}
safe_color = color.lstrip('#')
filter_complex = (
f"[0:a]showwaves=s={size}:mode={style}:colors={safe_color}:rate=25[wave];"
f"[0:v][wave]{pos_map[position]}"
)
cmd = [
"ffmpeg", "-i", video_path,
"-filter_complex", filter_complex,
"-c:a", "copy",
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-y",
output_video_path
]
run_ffmpeg_command(cmd, "Generating Audio Waveform...")
return output_video_path
def create_pip_video(main_video, overlay_media, position, scale):
if not main_video: raise gr.Error("Please upload a main video.")
if not overlay_media: raise gr.Error("Please upload an overlay video or image.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_video_path = os.path.join(TEMP_DIR, f"pip_video_{timestamp}.mp4")
scale_filter = f"[1:v]scale=iw*{scale}:-1[scaled_overlay]"
pos_map = {
"Top-Left": "x=10:y=10",
"Top-Center": "x=(W-w)/2:y=10",
"Top-Right": "x=W-w-10:y=10",
"Center-Left": "x=10:y=(H-h)/2",
"Center": "x=(W-w)/2:y=(H-h)/2",
"Center-Right": "x=W-w-10:y=(H-h)/2",
"Bottom-Left": "x=10:y=H-h-10",
"Bottom-Center": "x=(W-w)/2:y=H-h-10",
"Bottom-Right": "x=W-w-10:y=H-h-10"
}
overlay_filter = f"[0:v][scaled_overlay]overlay={pos_map[position]}"
cmd = ["ffmpeg", "-i", main_video, "-i", overlay_media.name]
cmd.extend([
"-filter_complex", f"{scale_filter};{overlay_filter}",
"-map", "0:a?", "-c:a", "copy",
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-y",
output_video_path
])
run_ffmpeg_command(cmd, "Creating Picture-in-Picture video...")
return output_video_path
def create_meme(image, text_input, position, font_choice, font_size_scale, text_color, outline_color):
if image is None: raise gr.Error("Please upload an image.")
parsed_text_color = parse_color(text_color)
parsed_outline_color = parse_color(outline_color)
img = Image.fromarray(image).convert("RGB")
draw = ImageDraw.Draw(img)
FONT_MAP = {
"Impact": "impact.ttf",
"Arial": "arial.ttf",
"Arial Black": "ariblk.ttf",
"Comic Sans MS": "comic.ttf",
"Courier New": "cour.ttf",
"Georgia": "georgia.ttf",
"Tahoma": "tahoma.ttf",
"Times New Roman": "times.ttf",
"Trebuchet MS": "trebuc.ttf",
"Verdana": "verdana.ttf"
}
font_path = FONT_MAP.get(font_choice, "impact.ttf")
try:
font_size = int(img.width / 10 * (font_size_scale / 5))
font = ImageFont.truetype(font_path, font_size)
except IOError:
gr.Warning(f"{font_choice} font ('{font_path}') not found. Trying Arial.")
try:
font_path = FONT_MAP["Arial"]
font = ImageFont.truetype(font_path, font_size)
except IOError:
gr.Warning("Arial font not found. Using default font.")
font = ImageFont.load_default()
def draw_text_with_outline(text, x, y):
# Outline
draw.text((x-2, y-2), text, font=font, fill=parsed_outline_color)
draw.text((x+2, y-2), text, font=font, fill=parsed_outline_color)
draw.text((x-2, y+2), text, font=font, fill=parsed_outline_color)
draw.text((x+2, y+2), text, font=font, fill=parsed_outline_color)
# Main Text
draw.text((x, y), text, font=font, fill=parsed_text_color)
if text_input:
bbox = draw.textbbox((0, 0), text_input.upper(), font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
x = (img.width - text_width) / 2
if position == "Top":
y = 10
elif position == "Bottom":
y = img.height - text_height - 10
else: # Center
y = (img.height - text_height) / 2
draw_text_with_outline(text_input.upper(), x, y)
return img
def stitch_images_smartly(img1_np, img2_np, output_size, bg_color_hex):
"""
Stitches two images together into a square.
- If input images are vertical (based on first image), they are placed side-by-side.
- If input images are horizontal, they are stacked vertically.
"""
if img1_np is None or img2_np is None:
raise gr.Error("Please upload two images.")
# Convert inputs to PIL Images
img1 = Image.fromarray(img1_np).convert("RGBA")
img2 = Image.fromarray(img2_np).convert("RGBA")
# Parse background color using the utility function
bg_color = parse_color(bg_color_hex)
# Create the final square canvas
final_image = Image.new("RGB", (output_size, output_size), bg_color)
# Determine orientation from the first image
w1, h1 = img1.size
is_vertical = h1 > w1
if is_vertical:
# --- Place two vertical images side-by-side ---
target_box_w = output_size // 2
target_box_h = output_size
# Process image 1: resize and paste centered in the left box
img1.thumbnail((target_box_w, target_box_h), Image.Resampling.LANCZOS)
x1_offset = (target_box_w - img1.width) // 2
y1_offset = (target_box_h - img1.height) // 2
final_image.paste(img1, (x1_offset, y1_offset), img1)
# Process image 2: resize and paste centered in the right box
img2.thumbnail((target_box_w, target_box_h), Image.Resampling.LANCZOS)
x2_offset = target_box_w + (target_box_w - img2.width) // 2
y2_offset = (target_box_h - img2.height) // 2
final_image.paste(img2, (x2_offset, y2_offset), img2)
else:
# --- Stack two horizontal images vertically ---
target_box_w = output_size
target_box_h = output_size // 2
# Process image 1: resize and paste centered in the top box
img1.thumbnail((target_box_w, target_box_h), Image.Resampling.LANCZOS)
x1_offset = (target_box_w - img1.width) // 2
y1_offset = (target_box_h - img1.height) // 2
final_image.paste(img1, (x1_offset, y1_offset), img1)
# Process image 2: resize and paste centered in the bottom box
img2.thumbnail((target_box_w, target_box_h), Image.Resampling.LANCZOS)
x2_offset = (target_box_w - img2.width) // 2
y2_offset = target_box_h + (target_box_h - img2.height) // 2
final_image.paste(img2, (x2_offset, y2_offset), img2)
return final_image
def merge_videos(videos):
if not videos or len(videos) < 2:
raise gr.Error("Please upload at least two videos to merge.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"merge_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
first_video_path = videos[0].name
w, h = get_video_dimensions(first_video_path)
fps = get_video_fps(first_video_path)
w -= w % 2
h -= h % 2
processed_clips = []
for i, video_file in enumerate(videos):
clip_path = os.path.join(job_temp_dir, f"clip_{i}.mp4")
cmd = [
"ffmpeg", "-i", video_file.name,
"-vf", f"scale={w}:{h},setsar=1", "-r", str(fps),
"-c:v", "libx264", "-pix_fmt", "yuv420p",
"-c:a", "aac", "-ar", "44100",
"-y", clip_path
]
run_ffmpeg_command(cmd)
processed_clips.append(clip_path)
file_list_path = os.path.join(job_temp_dir, "files.txt")
with open(file_list_path, 'w', encoding='utf-8') as f:
for path in processed_clips:
f.write(f"file '{os.path.abspath(path)}'\n")
output_video_path = os.path.join(TEMP_DIR, f"merged_video_{timestamp}.mp4")
cmd_merge = [
"ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path,
"-c", "copy", "-y", output_video_path
]
run_ffmpeg_command(cmd_merge, "Merging videos...")
shutil.rmtree(job_temp_dir)
return output_video_path
# ### --- NEW: BATCH IMAGE CROPPER --- ###
def update_crop_preview(original_image, x, y, w, h):
if original_image is None:
return None
# Create a copy to draw on
preview_image = original_image.copy()
draw = ImageDraw.Draw(preview_image)
# Define the bounding box for the crop area
box = (x, y, x + w, y + h)
# Draw a rectangle outline
draw.rectangle(box, outline="#38bdf8", width=3)
return preview_image
def crop_single_image(input_path, output_path, **kwargs):
x = int(kwargs.get('x', 0))
y = int(kwargs.get('y', 0))
w = int(kwargs.get('w', 512))
h = int(kwargs.get('h', 512))
with Image.open(input_path) as img:
cropped_img = img.crop((x, y, x + w, y + h))
cropped_img.save(output_path)
def batch_crop_images(files, x, y, w, h):
if not files: raise gr.Error("Please upload at least one image.")
if w <= 0 or h <= 0: raise gr.Error("Width and Height must be positive.")
processing_kwargs = {'x': x, 'y': y, 'w': w, 'h': h}
output_paths, zip_path, _ = batch_image_processor(
files,
crop_single_image,
"cropped",
**processing_kwargs
)
return output_paths, zip_path
# ### --- NEW: COLLAGE MAKER --- ###
def create_collage(files, layout, width, height, bg_color_hex):
if not files:
raise gr.Error("Please upload images to create a collage.")
bg_color = parse_color(bg_color_hex)
images = [Image.open(file.name).convert("RGBA") for file in files]
n = len(images)
if layout == "Grid":
cols = int(math.ceil(math.sqrt(n)))
rows = int(math.ceil(n / cols))
elif layout == "Horizontal":
cols, rows = n, 1
else: # Vertical
cols, rows = 1, n
cell_w = width // cols
cell_h = height // rows
canvas = Image.new("RGB", (width, height), bg_color)
for i, img in enumerate(images):
row = i // cols
col = i % cols
img.thumbnail((cell_w, cell_h), Image.Resampling.LANCZOS)
paste_x = (col * cell_w) + (cell_w - img.width) // 2
paste_y = (row * cell_h) + (cell_h - img.height) // 2
canvas.paste(img, (paste_x, paste_y), img)
return canvas
# ### --- NEW: VIDEO GRID COMPILER --- ###
def update_audio_source_choices_for_grid(files):
if not files:
return gr.update(choices=["From Video 1", "None"], value="From Video 1")
choices = [f"From Video {i+1}" for i in range(len(files))]
choices.append("None")
return gr.update(choices=choices, value=choices[0])
def compile_video_grid(videos, layout, width, height, bg_color, audio_choice, music_path):
if not videos:
raise gr.Error("Please upload videos to compile.")
num_videos = len(videos)
layout_map = {
"2x1 (Side-by-Side)": 2,
"1x2 (Stacked)": 2,
"2x2 (Quad-View)": 4,
"4x4 (16-View)": 16,
"8x4 (32-View)": 32,
}
required_videos = layout_map.get(layout)
if num_videos != required_videos:
raise gr.Error(f"The '{layout}' layout requires exactly {required_videos} videos, but you uploaded {num_videos}.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(TEMP_DIR, f"grid_video_{timestamp}.mp4")
width, height = int(width) - (int(width) % 2), int(height) - (int(height) % 2)
cmd = ["ffmpeg", "-y"]
input_count = 0
for video_file in videos:
cmd.extend(["-i", video_file.name])
input_count += 1
if music_path:
cmd.extend(["-i", music_path])
filter_complex_parts = []
if layout in ["2x1 (Side-by-Side)", "1x2 (Stacked)", "2x2 (Quad-View)"]:
if layout == "2x1 (Side-by-Side)":
tile_w, tile_h = width // 2, height
filter_complex_parts.append(f"[0:v]scale={tile_w}:{tile_h}:force_original_aspect_ratio=decrease,pad={tile_w}:{tile_h}:-1:-1:color={bg_color}[v0];[1:v]scale={tile_w}:{tile_h}:force_original_aspect_ratio=decrease,pad={tile_w}:{tile_h}:-1:-1:color={bg_color}[v1];[v0][v1]hstack=inputs=2[vout]")
elif layout == "1x2 (Stacked)":
tile_w, tile_h = width, height // 2
filter_complex_parts.append(f"[0:v]scale={tile_w}:{tile_h}:force_original_aspect_ratio=decrease,pad={tile_w}:{tile_h}:-1:-1:color={bg_color}[v0];[1:v]scale={tile_w}:{tile_h}:force_original_aspect_ratio=decrease,pad={tile_w}:{tile_h}:-1:-1:color={bg_color}[v1];[v0][v1]vstack=inputs=2[vout]")
elif layout == "2x2 (Quad-View)":
tile_w, tile_h = width // 2, height // 2
for i in range(4):
filter_complex_parts.append(f"[{i}:v]scale={tile_w}:{tile_h}:force_original_aspect_ratio=decrease,pad={tile_w}:{tile_h}:-1:-1:color={bg_color}[v{i}]")
filter_complex_parts.append("[v0][v1][v2][v3]xstack=inputs=4:layout=0_0|w0_0|0_h0|w0_h0[vout]")
elif layout in ["4x4 (16-View)", "8x4 (32-View)"]:
cols, rows = (4, 4) if layout == "4x4 (16-View)" else (8, 4)
tile_w, tile_h = width // cols, height // rows
# 1. Scale all inputs
for i in range(required_videos):
filter_complex_parts.append(f"[{i}:v]scale={tile_w}:{tile_h}:force_original_aspect_ratio=decrease,pad={tile_w}:{tile_h}:-1:-1:color={bg_color}[v{i}]")
# 2. Horizontally stack videos for each row
row_outputs = []
for r in range(rows):
start_index = r * cols
end_index = start_index + cols
row_inputs = "".join([f"[v{i}]" for i in range(start_index, end_index)])
row_output_label = f"[row{r}]"
filter_complex_parts.append(f"{row_inputs}hstack=inputs={cols}{row_output_label}")
row_outputs.append(row_output_label)
# 3. Vertically stack all the rows
final_vstack_inputs = "".join(row_outputs)
filter_complex_parts.append(f"{final_vstack_inputs}vstack=inputs={rows}[vout]")
cmd.extend(["-filter_complex", ";".join(filter_complex_parts)])
cmd.extend(["-map", "[vout]"])
# Audio mapping logic
if music_path:
cmd.extend(["-map", f"{input_count}:a?"]) # Map the external audio track
cmd.extend(["-c:a", "aac", "-shortest"])
elif audio_choice != "None":
try:
audio_idx_match = re.search(r'\d+', audio_choice)
if audio_idx_match:
audio_idx = int(audio_idx_match.group()) - 1
if 0 <= audio_idx < num_videos:
cmd.extend(["-map", f"{audio_idx}:a?"])
cmd.extend(["-c:a", "aac", "-shortest"])
except (AttributeError, IndexError):
raise gr.Error("Invalid audio source selected.")
cmd.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", output_path])
run_ffmpeg_command(cmd, "Compiling video grid...")
return output_path
def _create_automated_slideshow_impl(images, audio_path, kb_effect_style, transition_style, rhythm_choice, out_w, out_h, progress: Progress):
"""Internal implementation of the slideshow creator with progress tracking."""
if not images: raise gr.Error("Please upload at least one image.")
if not audio_path: raise gr.Error("Please upload an audio track for rhythmic editing.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"slideshow_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
try:
out_w, out_h = int(out_w) - (int(out_w) % 2), int(out_h) - (int(out_h) % 2)
output_res_str = f"{out_w}x{out_h}"
fps = 30
transition_duration = 0.5
progress(0, desc="Analyzing audio track...")
try:
y, sr = librosa.load(audio_path)
audio_duration = librosa.get_duration(y=y, sr=sr)
_, beat_frames = librosa.beat.beat_track(y=y, sr=sr, units='frames')
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
except Exception as e:
raise gr.Error(f"Audio analysis failed: {e}")
beats_per_clip = {"1 Image per Beat": 1, "1 Image every 2 Beats": 2, "1 Image per Measure (4 Beats)": 4}[rhythm_choice]
clip_start_times = [0.0] + [beat_times[i] for i in range(beats_per_clip, len(beat_times), beats_per_clip)]
MAX_CLIPS = 200
if len(clip_start_times) > MAX_CLIPS:
gr.Warning(f"Audio beat detection resulted in {len(clip_start_times)} clips. Capping at {MAX_CLIPS} to ensure performance.")
clip_start_times = clip_start_times[:MAX_CLIPS]
num_clips = len(clip_start_times)
image_paths = [img.name for img in images]
looped_image_paths = [image_paths[i % len(image_paths)] for i in range(num_clips)]
kb_clips = []
total_steps = num_clips + 1
current_step = 0
for i in range(num_clips):
progress(current_step / total_steps, desc=f"Creating clip {i+1}/{num_clips}")
start_time = clip_start_times[i]
end_time = clip_start_times[i + 1] if i + 1 < len(clip_start_times) else audio_duration
clip_duration = end_time - start_time
if clip_duration <= transition_duration: continue
total_frames = int(clip_duration * fps)
if total_frames <= 0: continue
output_clip_path = os.path.join(job_temp_dir, f"kb_clip_{i:04d}.mp4")
with Image.open(looped_image_paths[i]) as img:
iw, ih = img.size
zoom_levels = {"Subtle": (1.1, 1.15), "Standard": (1.1, 1.25), "Dynamic": (1.2, 1.5)}
start_zoom = 1.0
end_zoom = random.uniform(*zoom_levels[kb_effect_style])
directions = ['top_left', 'top_right', 'bottom_left', 'bottom_right', 'center']
start_pos_name, end_pos_name = random.sample(directions, 2)
def get_xy(pos_name, zoom_val, img_w, img_h):
if pos_name == 'center': return (img_w/2 - (img_w/zoom_val)/2, img_h/2 - (img_h/zoom_val)/2)
if pos_name == 'top_left': return (0, 0)
if pos_name == 'top_right': return (img_w - img_w/zoom_val, 0)
if pos_name == 'bottom_left': return (0, img_h - img_h/zoom_val)
if pos_name == 'bottom_right': return (img_w - img_w/zoom_val, img_h - img_h/zoom_val)
return (0,0)
start_x, start_y = get_xy(start_pos_name, start_zoom, iw, ih)
end_x, end_y = get_xy(end_pos_name, end_zoom, iw, ih)
x_expr = f"{start_x}+({end_x}-({start_x}))*on/({total_frames}-1)"
y_expr = f"{start_y}+({end_y}-({start_y}))*on/({total_frames}-1)"
z_expr = f"if(lte(on,0),{start_zoom},{start_zoom}+({end_zoom}-{start_zoom})*on/({total_frames}-1))"
zoompan_filter = f"zoompan=z='{z_expr}':x='{x_expr}':y='{y_expr}':d={total_frames}:s={output_res_str}:fps={fps}"
cmd = ["ffmpeg", "-y", "-loop", "1", "-i", looped_image_paths[i], "-vf", zoompan_filter, "-t", str(clip_duration), "-c:v", "libx264", "-pix_fmt", "yuv420p", output_clip_path]
run_ffmpeg_command(cmd)
kb_clips.append({"path": output_clip_path, "duration": clip_duration})
current_step += 1
if not kb_clips: raise gr.Error("No clips were generated. The audio may be too short or the rhythm settings too fast.")
progress(current_step / total_steps, desc=f"Applying transitions...")
final_silent_path = os.path.join(job_temp_dir, "final_silent.mp4")
if len(kb_clips) == 1:
shutil.copy(kb_clips[0]['path'], final_silent_path)
else:
all_transitions = ["fade", "wipeleft", "wiperight", "wipeup", "wipedown", "slideleft", "slideright", "slideup", "slidedown", "dissolve"]
cmd = ["ffmpeg", "-y"]
filter_complex = []
running_duration = 0
for i, clip in enumerate(kb_clips):
cmd.extend(["-i", clip['path']])
for i in range(len(kb_clips) - 1):
input1 = f"[{i}:v]" if i == 0 else f"[v{i-1}]"
input2 = f"[{i+1}:v]"
output = f"[v{i}]"
transition = random.choice(all_transitions) if transition_style == "Random" else transition_style.lower()
offset = running_duration + kb_clips[i]['duration'] - transition_duration
filter_complex.append(f"{input1}{input2}xfade=transition={transition}:duration={transition_duration}:offset={offset}{output}")
running_duration += kb_clips[i]['duration'] - transition_duration
cmd.extend(["-filter_complex", ";".join(filter_complex), "-map", f"[v{len(kb_clips)-2}]", "-c:v", "libx264", "-pix_fmt", "yuv420p", final_silent_path])
run_ffmpeg_command(cmd)
progress(0.98, desc="Muxing final audio...")
final_output_path = os.path.join(TEMP_DIR, f"slideshow_final_{timestamp}.mp4")
run_ffmpeg_command(["ffmpeg", "-y", "-i", final_silent_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", final_output_path], "Muxing audio")
return final_output_path
finally:
if os.path.exists(job_temp_dir):
shutil.rmtree(job_temp_dir)
def slideshow_wrapper(images, audio_path, kb_effect_style, transition_style, rhythm_choice, out_w, out_h, progress=gr.Progress(track_tqdm=True)):
return _create_automated_slideshow_impl(images, audio_path, kb_effect_style, transition_style, rhythm_choice, out_w, out_h, progress)
def _create_rhythmic_remix_impl(video_path, audio_path, cut_style, beat_sync, resolution_choice, custom_w, custom_h, progress: Progress):
"""Internal implementation of the auto-rhythmic video remixer."""
if not video_path: raise gr.Error("Please upload a source video.")
if not audio_path: raise gr.Error("Please upload an audio track.")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
job_temp_dir = os.path.join(TEMP_DIR, f"remix_{timestamp}")
os.makedirs(job_temp_dir, exist_ok=True)
try:
vf_filter = None
if resolution_choice == "Match Source Video Dimensions":
out_w, out_h = get_video_dimensions(video_path)
if out_w == 0 or out_h == 0:
gr.Warning("Could not read source video dimensions. Defaulting to 1080p.")
out_w, out_h = 1920, 1080
vf_filter = f"scale={out_w}:{out_h}:force_original_aspect_ratio=decrease,pad={out_w}:{out_h}:(ow-iw)/2:(oh-ih)/2,setsar=1"
elif resolution_choice == "1080p (1920x1080)":
out_w, out_h = 1920, 1080
elif resolution_choice == "720p (1280x720)":
out_w, out_h = 1280, 720
elif resolution_choice == "Custom":
out_w, out_h = int(custom_w), int(custom_h)
if out_w <= 0 or out_h <= 0:
raise gr.Error("Custom width and height must be positive numbers.")
out_w, out_h = out_w - (out_w % 2), out_h - (out_h % 2)
if vf_filter is None and resolution_choice != "Match Source Video Dimensions":
vf_filter = f"scale={out_w}:{out_h}:force_original_aspect_ratio=decrease,pad={out_w}:{out_h}:(ow-iw)/2:(oh-ih)/2,setsar=1"
progress(0, desc="Analyzing audio track for beats...")
try:
y, sr = librosa.load(audio_path)
audio_duration = librosa.get_duration(y=y, sr=sr)
_, beat_frames = librosa.beat.beat_track(y=y, sr=sr, units='frames')
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
if len(beat_times) < 2: raise ValueError("Not enough beats were detected in the audio.")
except Exception as e:
raise gr.Error(f"Audio analysis failed: {e}")
beats_per_clip = {"On the Beat": 1, "Every 2 Beats": 2, "Every Measure (4 beats)": 4}[beat_sync]
clip_definitions = []
clip_start_beat_indices = range(0, len(beat_times), beats_per_clip)
for i, beat_index in enumerate(clip_start_beat_indices):
start_beat_time = beat_times[beat_index]
if i + 1 < len(clip_start_beat_indices):
end_beat_time = beat_times[clip_start_beat_indices[i+1]]
else:
end_beat_time = audio_duration
duration = end_beat_time - start_beat_time
if duration > 0.1:
clip_definitions.append({'duration': duration})
if not clip_definitions:
raise gr.Error("Could not define any video clips based on the detected beats.")
progress(0.1, desc="Planning video cuts...")
source_duration = get_media_duration(video_path)
current_time_in_source = 0
for clip in clip_definitions:
if cut_style == "Sequential":
clip['source_start'] = current_time_in_source
current_time_in_source += clip['duration']
if current_time_in_source > source_duration:
gr.Warning("Source video is shorter than the music. Looping video from the beginning.")
current_time_in_source = 0
elif cut_style == "Random Shuffle":
max_start_time = source_duration - clip['duration']
clip['source_start'] = random.uniform(0, max_start_time) if max_start_time > 0 else 0
extracted_clip_paths = []
for i, clip in enumerate(clip_definitions):
progress(0.1 + (0.7 * (i / len(clip_definitions))), desc=f"Extracting clip {i+1}/{len(clip_definitions)}...")
output_clip_path = os.path.join(job_temp_dir, f"clip_{i:04d}.mp4")
cmd = [
"ffmpeg", "-y",
"-ss", str(clip['source_start']),
"-i", video_path,
"-t", str(clip['duration']),
"-an",
"-c:v", "libx264", "-pix_fmt", "yuv420p"
]
if vf_filter:
cmd.extend(["-vf", vf_filter])
cmd.append(output_clip_path)
run_ffmpeg_command(cmd)
extracted_clip_paths.append(output_clip_path)
progress(0.85, desc="Stitching clips together...")
file_list_path = os.path.join(job_temp_dir, "files.txt")
with open(file_list_path, 'w', encoding='utf-8') as f:
for path in extracted_clip_paths:
f.write(f"file '{os.path.abspath(path)}'\n")
silent_final_path = os.path.join(job_temp_dir, "final_silent.mp4")
run_ffmpeg_command(["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", silent_final_path], "Concatenating clips...")
progress(0.95, desc="Adding music...")
final_output_path = os.path.join(TEMP_DIR, f"remix_final_{timestamp}.mp4")
run_ffmpeg_command(["ffmpeg", "-y", "-i", silent_final_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", final_output_path], "Muxing audio")
return final_output_path
finally:
if os.path.exists(job_temp_dir):
shutil.rmtree(job_temp_dir)
def rhythmic_remix_wrapper(video_path, audio_path, cut_style, beat_sync, resolution_choice, custom_w, custom_h, progress=gr.Progress(track_tqdm=True)):
return _create_rhythmic_remix_impl(video_path, audio_path, cut_style, beat_sync, resolution_choice, custom_w, custom_h, progress)
# --- BLING --- CSS AND JS ---
bling_css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;700&display=swap');
:root {
--bling-main-font: 'Inter', sans-serif;
--bling-gradient-start: #0f172a;
--bling-gradient-mid: #1e293b;
--bling-gradient-end: #334155;
--bling-accent-color: #38bdf8; /* sky-400 */
}
body, .gradio-container {
font-family: var(--bling-main-font) !important;
background: var(--bling-gradient-start);
background: linear-gradient(135deg, var(--bling-gradient-start) 0%, var(--bling-gradient-mid) 50%, var(--bling-gradient-end) 100%);
background-size: 200% 200%;
animation: gradient-animation 15s ease infinite;
}
@keyframes gradient-animation {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
/* Glassmorphism for containers */
.gradio-tabs, .gradio-accordion, .gradio-group {
background: rgba(255, 255, 255, 0.05) !important;
border: 1px solid rgba(255, 255, 255, 0.1) !important;
border-radius: 12px !important;
backdrop-filter: blur(10px) !important;
-webkit-backdrop-filter: blur(10px) !important;
box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1) !important;
}
/* Button Bling */
.gradio-button {
transition: all 0.2s ease-in-out !important;
box-shadow: 0 2px 4px rgba(0,0,0,0.2) !important;
}
.gradio-button:hover {
transform: translateY(-2px);
box-shadow: 0 4px 8px rgba(0,0,0,0.3) !important;
filter: brightness(1.1);
}
/* Custom Info/Warning Boxes */
.gradio-info {
background: rgba(56, 189, 248, 0.1) !important; /* sky-400 with alpha */
color: #f0f9ff !important; /* sky-50 */
border-left: 4px solid var(--bling-accent-color) !important;
border-radius: 8px !important;
}
.gradio-warning {
background: rgba(251, 191, 36, 0.1) !important; /* amber-400 with alpha */
color: #fffbeb !important; /* amber-50 */
border-left: 4px solid #fbbf24 !important;
border-radius: 8px !important;
}
/* Custom Scrollbars */
::-webkit-scrollbar { width: 8px; }
::-webkit-scrollbar-track { background: rgba(255, 255, 255, 0.1); }
::-webkit-scrollbar-thumb { background-color: var(--bling-accent-color); border-radius: 4px; }
::-webkit-scrollbar-thumb:hover { background-color: #0ea5e9; } /* sky-500 */
#custom-footer {
text-align: center !important;
padding: 20px 0 5px 0 !important;
font-size: .9em;
color: #94a3b8; /* slate-400 */
}
/* Loading Overlay CSS */
#loading-overlay {
position: fixed;
top: 0;
left: 0;
width: 100vw;
height: 100vh;
background-color: rgba(15, 23, 42, 0.8);
z-index: 10000;
display: flex;
justify-content: center;
align-items: center;
flex-direction: column;
color: white;
font-size: 1.2em;
backdrop-filter: blur(5px);
-webkit-backdrop-filter: blur(5px);
opacity: 0;
visibility: hidden;
transition: opacity 0.3s ease, visibility 0.3s ease;
}
#loading-overlay.visible {
opacity: 1;
visibility: visible;
}
.spinner {
width: 60px;
height: 60px;
border: 5px solid rgba(255, 255, 255, 0.3);
border-top-color: var(--bling-accent-color);
border-radius: 50%;
animation: spin 1s linear infinite;
margin-bottom: 20px;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
"""
bling_js = """
() => {
// --- JKL Video Control ---
let activeVideo = null;
document.addEventListener('mouseover', (e) => {
if (e.target.tagName === 'VIDEO') {
activeVideo = e.target;
}
});
document.addEventListener('keydown', (e) => {
const activeElement = document.activeElement;
if (activeElement && (activeElement.tagName === 'INPUT' || activeElement.tagName === 'TEXTAREA')) {
return;
}
if (!activeVideo) return;
const frameTime = 1 / 30;
let handled = false;
switch (e.key.toLowerCase()) {
case 'k': activeVideo.paused ? activeVideo.play() : activeVideo.pause(); handled = true; break;
case 'j': activeVideo.currentTime = Math.max(0, activeVideo.currentTime - frameTime); handled = true; break;
case 'l': activeVideo.currentTime += frameTime; handled = true; break;
}
if (handled) e.preventDefault();
});
// --- Loading Overlay ---
function show_overlay(message = 'Processing... Please wait.') {
let overlay = document.getElementById('loading-overlay');
if (!overlay) {
overlay = document.createElement('div');
overlay.id = 'loading-overlay';
overlay.innerHTML = `<div class="spinner"></div><p id="loading-message"></p>`;
document.body.appendChild(overlay);
}
document.getElementById('loading-message').textContent = message;
overlay.classList.add('visible');
}
function hide_overlay() {
const overlay = document.getElementById('loading-overlay');
if (overlay) {
overlay.classList.remove('visible');
}
}
// --- Confetti ---
function fire_confetti() {
const a=document.createElement("script");a.setAttribute("src","https://cdn.jsdelivr.net/npm/canvas-confetti@1.9.2/dist/confetti.browser.min.js"),document.head.appendChild(a),a.onload=()=>{var e=confetti.create(null,{resize:!0,useWorker:!0});e({particleCount:150,spread:90,origin:{y:.6}})}
}
// --- Audio Feedback with Synthesized Whistle ---
const skriptz_audio = {
context: null,
isInitialized: false,
};
async function init_audio() {
if (skriptz_audio.isInitialized) return;
try {
skriptz_audio.context = new (window.AudioContext || window.webkitAudioContext)();
if (skriptz_audio.context.state === 'suspended') {
await skriptz_audio.context.resume();
}
} catch (e) {
console.error('Failed to initialize Web Audio API:', e);
}
skriptz_audio.isInitialized = true;
}
async function play_finish_sound() {
if (!skriptz_audio.isInitialized) {
await init_audio();
}
const context = skriptz_audio.context;
if (!context) return;
if (context.state === 'suspended') {
await context.resume();
}
const now = context.currentTime;
const delay = 0.2;
const startTime = now + delay;
const oscillator = context.createOscillator();
const gainNode = context.createGain();
oscillator.connect(gainNode);
gainNode.connect(context.destination);
oscillator.type = 'sine';
const startFreq = 2000;
const endFreq = 1000;
oscillator.frequency.setValueAtTime(startFreq, startTime);
oscillator.frequency.exponentialRampToValueAtTime(endFreq, startTime + 0.15);
gainNode.gain.setValueAtTime(0, startTime);
gainNode.gain.linearRampToValueAtTime(0.4, startTime + 0.02);
gainNode.gain.linearRampToValueAtTime(0, startTime + 0.15);
oscillator.start(startTime);
oscillator.stop(startTime + 0.2);
}
// --- Dynamic Page Title ---
function update_title(tab_name) {
if (tab_name) {
const clean_name = tab_name.replace(/[\\u{1F600}-\\u{1F64F}\\u{1F300}-\\u{1F5FF}\\u{1F680}-\\u{1F6FF}\\u{1F700}-\\u{1F77F}\\u{1F780}-\\u{1F7FF}\\u{1F800}-\\u{1F8FF}\\u{1F900}-\\u{1F9FF}\\u{1FA00}-\\u{1FA6F}\\u{1FA70}-\\u{1FAFF}\\u{2600}-\\u{26FF}\\u{2700}-\\u{27BF}]/gu, '').trim();
document.title = `Skriptz - ${clean_name}`;
} else {
document.title = "Skriptz - Universal Tool";
}
}
// --- Copy to Clipboard ---
function copy_to_clipboard(text_id) {
const text_area = document.getElementById(text_id).querySelector('textarea');
if(text_area) {
text_area.select();
document.execCommand('copy');
const original_button = this.event.target;
const original_text = original_button.innerText;
original_button.innerText = 'Copied!';
setTimeout(() => { original_button.innerText = original_text; }, 2000);
}
}
// --- Storyboard Time Getter ---
function storyboard_get_time(){
const e=document.querySelector('#storyboard_clip_preview video');
return e?e.currentTime:0
}
// Make functions globally accessible for Gradio
window.skriptz_bling = {
show_overlay,
hide_overlay,
fire_confetti,
play_finish_sound,
update_title,
copy_to_clipboard,
storyboard_get_time
};
}
"""
with gr.Blocks(
title="Skriptz - Universal Tool",
css=bling_css,
js=bling_js
) as demo:
gr.HTML("""
<div id="loading-overlay">
<div class="spinner"></div>
<p id="loading-message">Processing... Please wait.</p>
</div>
<script src="https://cdn.jsdelivr.net/npm/canvas-confetti@1.9.2/dist/confetti.browser.min.js"></script>
""", visible=False)
logo_b64 = get_image_as_base64("logo.png")
if logo_b64: gr.HTML(f"""<div style="display: flex; justify-content: center; align-items: center; text-align: center; margin-bottom: 20px;"><a href="https://linktr.ee/skylinkd" target="_blank" rel="noopener noreferrer"><img src="{logo_b64}" alt="Skriptz Banner" style="max-width: 100%; max-height: 100px; height: auto;"></a></div>""")
else: gr.Markdown("# Skriptz Universal Tool")
gr.Markdown("<h3 style='text-align: center;'>Your one-stop shop for video and image processing</h3>")
storyboard_get_time_js = "() => { return window.skriptz_bling.storyboard_get_time(); }"
show_overlay_js = "() => { window.skriptz_bling.show_overlay('Working hard... this may take a moment!'); }"
hide_overlay_js = "() => { window.skriptz_bling.hide_overlay(); }"
fire_confetti_and_sound_js = "() => { window.skriptz_bling.fire_confetti(); window.skriptz_bling.play_finish_sound(); }"
copy_transcription_js = "() => { window.skriptz_bling.copy_to_clipboard('transcription_textbox'); }"
# --- UNIFIED HEIGHT FOR MEDIA COMPONENTS ---
UNIFIED_HEIGHT = 440
with gr.Tabs(elem_id="main_tabs") as main_tabs:
with gr.TabItem("🎬 Storyboard & Animatic", elem_id="storyboard_tab"):
gr.Markdown("## Create Video Animatics from Images and Clips")
gr.Info("1. **Build:** Upload assets, click to add to timeline. 2. **Time:** Set durations, trim videos, or use Rhythmic Editing. 3. **Generate:** Create your final video.")
assets_state = gr.State([])
timeline_state = gr.State([])
selected_timeline_index_state = gr.State(-1)
with gr.Row(equal_height=False):
with gr.Column(scale=2, min_width=400):
with gr.Group():
gr.Markdown("### 1. Asset Bin")
assets_upload_btn = gr.File(label="Upload Images & Video Clips", file_count="multiple", file_types=["image", "video"])
asset_gallery = gr.Gallery(label="Click an asset to add it to the timeline", columns=4, object_fit="contain", height=400)
asset_preview_gallery = gr.Gallery(label="Video Asset Preview (First & Last Frame)", columns=2, height=240, object_fit="contain", interactive=False)
with gr.Row():
add_all_to_timeline_btn = gr.Button("⬇️ Add All to Timeline")
clear_assets_btn = gr.Button("πŸ—‘οΈ Clear Asset Bin")
with gr.Column(scale=3, min_width=600):
with gr.Group():
gr.Markdown("### 2. Timeline & Generation")
timeline_df = gr.DataFrame(headers=["#", "Asset", "Type", "Duration (s)"], datatype=["number", "str", "str", "number"], interactive=False, row_count=(10, "fixed"))
with gr.Row():
timeline_up_btn = gr.Button("⬆️ Move Up", interactive=False)
timeline_down_btn = gr.Button("⬇️ Move Down", interactive=False)
timeline_remove_btn = gr.Button("πŸ—‘οΈ Remove", interactive=False)
clear_timeline_btn = gr.Button("πŸ’₯ Clear Timeline")
gr.Markdown("### 3. Output Settings")
keep_audio_checkbox = gr.Checkbox(label="Keep Original Audio from Video Clips", value=False, info="If checked, the Project Audio Track below will be ignored.")
animatic_audio = gr.Audio(label="Project Audio Track (Narration/Music)", type="filepath")
with gr.Row():
animatic_out_w = gr.Number(label="Output Width", value=1920)
animatic_out_h = gr.Number(label="Output Height", value=1080)
match_first_asset_btn = gr.Button("πŸ“ Match First Asset's Resolution")
generate_animatic_btn = gr.Button("🎬 Generate Manual Animatic", variant="secondary")
with gr.Accordion("🎡 Rhythmic Editing (Beat Sync)", open=False):
gr.Info("This will override manual durations and re-time all clips to match the music's rhythm.")
with gr.Row():
analyze_bpm_btn = gr.Button("πŸ₯ Analyze BPM")
bpm_display = gr.Textbox(label="Audio BPM", interactive=False)
measure_dropdown = gr.Dropdown(
["2 Measures", "1 Measure", "1/2 Measure", "1/4 Measure (Beat)"],
value="1 Measure", label="Cut Duration per Clip"
)
generate_rhythmic_btn = gr.Button("🎢 Generate Rhythmic Animatic", variant="primary")
animatic_output_video = gr.Video(label="Final Video Output", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
with gr.Column(scale=2, min_width=300):
with gr.Group():
gr.Markdown("### 4. Clip Properties")
gr.Info("Select a clip in the timeline table to edit it.")
clip_preview = gr.Video(label="Selected Clip Preview", interactive=True, elem_id="storyboard_clip_preview", height=UNIFIED_HEIGHT)
clip_duration_input = gr.Number(label="Set Duration (seconds)", interactive=True, precision=2)
update_clip_btn = gr.Button("πŸ”„ Update Clip Duration")
with gr.Group(visible=False) as trim_group:
gr.Markdown("#### Video Trimming")
gr.Info("Use player (K=Play, J/L=Frame Step) to find a frame, then use buttons below.")
with gr.Row():
clip_start_time_input = gr.Number(label="Start Time (s)", precision=2)
clip_end_time_input = gr.Number(label="End Time (s)", precision=2)
with gr.Row():
set_clip_start_btn = gr.Button("Set START")
set_clip_end_btn = gr.Button("Set END")
apply_trim_btn = gr.Button("βœ‚οΈ Apply Trim")
assets_upload_btn.upload(add_assets_to_bin, [assets_upload_btn, assets_state], [assets_state, asset_gallery])
asset_gallery.select(handle_asset_selection, [assets_state, timeline_state], [timeline_state, asset_preview_gallery])
add_all_to_timeline_btn.click(add_all_assets_to_timeline, [assets_state, timeline_state], timeline_state)
timeline_state.change(update_timeline_df, timeline_state, timeline_df)
timeline_state.change(lambda: (-1, None, None, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False), gr.update(visible=False), 0, 0),
outputs=[selected_timeline_index_state, clip_preview, clip_duration_input, timeline_up_btn, timeline_down_btn, timeline_remove_btn, trim_group, clip_start_time_input, clip_end_time_input])
timeline_df.select(
handle_timeline_selection,
timeline_state,
[selected_timeline_index_state, clip_preview, clip_duration_input, timeline_up_btn, timeline_down_btn, timeline_remove_btn, trim_group, clip_start_time_input, clip_end_time_input]
)
update_clip_btn.click(update_clip_properties, [timeline_state, selected_timeline_index_state, clip_duration_input], timeline_state)
apply_trim_btn.click(apply_trim_and_update, [timeline_state, selected_timeline_index_state, clip_start_time_input, clip_end_time_input], [timeline_state, clip_duration_input])
set_clip_start_btn.click(fn=None, js=storyboard_get_time_js, outputs=clip_start_time_input)
set_clip_end_btn.click(fn=None, js=storyboard_get_time_js, outputs=clip_end_time_input)
timeline_up_btn.click(handle_timeline_action, [timeline_state, selected_timeline_index_state, gr.State("up")], [timeline_state, timeline_df])
timeline_down_btn.click(handle_timeline_action, [timeline_state, selected_timeline_index_state, gr.State("down")], [timeline_state, timeline_df])
timeline_remove_btn.click(handle_timeline_action, [timeline_state, selected_timeline_index_state, gr.State("remove")], [timeline_state, timeline_df])
clear_assets_btn.click(lambda: ([], gr.update(value=None), None), outputs=[assets_state, asset_gallery, asset_preview_gallery])
clear_timeline_btn.click(lambda: [], outputs=[timeline_state])
keep_audio_checkbox.change(fn=lambda x: gr.update(interactive=not x), inputs=keep_audio_checkbox, outputs=animatic_audio)
generate_animatic_btn.click(fn=create_animatic, inputs=[timeline_state, animatic_audio, animatic_out_w, animatic_out_h, keep_audio_checkbox], outputs=animatic_output_video).then(fn=None, js=fire_confetti_and_sound_js)
analyze_bpm_btn.click(detect_bpm, animatic_audio, bpm_display)
generate_rhythmic_btn.click(fn=create_rhythmic_animatic, inputs=[timeline_state, animatic_audio, measure_dropdown, animatic_out_w, animatic_out_h], outputs=animatic_output_video).then(fn=None, js=fire_confetti_and_sound_js)
match_first_asset_btn.click(set_resolution_from_first_asset, timeline_state, [animatic_out_w, animatic_out_h])
with gr.TabItem("🎨 Creative Suite", elem_id="creative_tab"):
with gr.Tabs():
with gr.TabItem("🎬 Automated Slideshow"):
gr.Markdown("## Automated Rhythmic Slideshow Creator")
gr.Info("Turn a collection of images and a music track into a dynamic video with Ken Burns effects and transitions synced to the beat.")
with gr.Row():
with gr.Column(scale=2):
slideshow_input_images = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
slideshow_audio = gr.Audio(label="Upload Music Track", type="filepath")
with gr.Accordion("βš™οΈ Style & Timing", open=True):
slideshow_kb_effect = gr.Dropdown(["Subtle", "Standard", "Dynamic"], value="Standard", label="Ken Burns Effect Intensity")
slideshow_transition = gr.Dropdown(["Random", "Fade", "WipeLeft", "WipeRight", "Dissolve", "SlideLeft", "SlideRight"], value="Random", label="Transition Style")
slideshow_rhythm = gr.Dropdown(["1 Image per Beat", "1 Image every 2 Beats", "1 Image per Measure (4 Beats)"], value="1 Image every 2 Beats", label="Image Display Rhythm")
with gr.Row():
slideshow_out_w = gr.Number(label="Output Width", value=1920)
slideshow_out_h = gr.Number(label="Output Height", value=1080)
slideshow_generate_btn = gr.Button("πŸš€ Generate Slideshow", variant="primary")
with gr.Column(scale=3):
slideshow_output_video = gr.Video(label="Generated Slideshow Video", show_download_button=True, height=UNIFIED_HEIGHT)
slideshow_generate_btn.click(
fn=slideshow_wrapper,
inputs=[slideshow_input_images, slideshow_audio, slideshow_kb_effect, slideshow_transition, slideshow_rhythm, slideshow_out_w, slideshow_out_h],
outputs=slideshow_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🎡 Auto Music Video"):
gr.Markdown("## Automatic Rhythmic Video Remixer")
gr.Info("Automatically cuts a source video to the beat of a music track, creating a dynamic music video.")
with gr.Row():
with gr.Column(scale=2):
remix_input_video = gr.Video(label="Upload Source Video", height=UNIFIED_HEIGHT)
remix_audio = gr.Audio(label="Upload Music Track", type="filepath")
with gr.Accordion("βš™οΈ Remix Settings", open=True):
remix_cut_style = gr.Radio(
["Sequential", "Random Shuffle"],
value="Random Shuffle",
label="Video Cut Style",
info="Sequential: Cuts the video from start to finish. Random Shuffle: Picks random moments from the source video."
)
remix_beat_sync = gr.Dropdown(
["On the Beat", "Every 2 Beats", "Every Measure (4 beats)"],
value="Every 2 Beats",
label="Cutting Frequency"
)
remix_resolution = gr.Radio(
["Match Source Video Dimensions", "1080p (1920x1080)", "720p (1280x720)", "Custom"],
value="1080p (1920x1080)",
label="Output Resolution"
)
with gr.Row(visible=False) as custom_res_row:
remix_custom_w = gr.Number(label="Custom Width", value=1920)
remix_custom_h = gr.Number(label="Custom Height", value=1080)
remix_generate_btn = gr.Button("🎀 Generate Music Video", variant="primary")
with gr.Column(scale=3):
remix_output_video = gr.Video(label="Generated Music Video", show_download_button=True, height=UNIFIED_HEIGHT)
remix_resolution.change(
fn=lambda choice: gr.update(visible=(choice == "Custom")),
inputs=remix_resolution,
outputs=custom_res_row
)
remix_generate_btn.click(
fn=rhythmic_remix_wrapper,
inputs=[remix_input_video, remix_audio, remix_cut_style, remix_beat_sync, remix_resolution, remix_custom_w, remix_custom_h],
outputs=remix_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("⚑ Gradual Speed Ramp"):
gr.Markdown("## Create a Smooth, Gradual Speed Ramp Effect")
gr.Info("Applies a smooth 'bullet-time' like effect to the entire video, slowing down to 50% speed in the middle and then ramping back up to normal speed. This uses frame interpolation for a high-quality result.")
with gr.Row():
with gr.Column(scale=2):
gradual_ramp_input_video = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
gradual_ramp_btn = gr.Button("βŒ› Apply Gradual Ramp", variant="primary")
with gr.Column(scale=3):
gradual_ramp_output_video = gr.Video(label="Ramped Video Output", show_download_button=True, height=UNIFIED_HEIGHT)
gradual_ramp_btn.click(
fn=create_gradual_ramp_video,
inputs=[gradual_ramp_input_video],
outputs=gradual_ramp_output_video,
show_progress="full"
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🎞️ Auto-Trailer"):
gr.Markdown("## Automatic Trailer Creator")
gr.Info("Upload a long video, and this tool will find the most action-packed moments to create a short, dynamic trailer.")
with gr.Row():
with gr.Column(scale=2):
trailer_input_video = gr.Video(label="Upload Source Video", height=UNIFIED_HEIGHT)
trailer_music = gr.Audio(label="Add Background Music (Optional)", type="filepath")
with gr.Accordion("βš™οΈ Trailer Settings", open=True):
trailer_total_duration = gr.Slider(10, 120, 30, step=5, label="Total Trailer Length (s)")
trailer_clip_duration = gr.Slider(1.0, 5.0, 2.0, step=0.5, label="Duration of Each Clip (s)")
trailer_analysis_method = gr.Dropdown(["Motion"], value="Motion", label="Scene Analysis Method", info="Currently only motion detection is supported.")
trailer_transition = gr.Dropdown(["None", "Fade", "WipeLeft", "WipeRight", "Dissolve", "SlideLeft", "SlideRight"], value="Fade", label="Transition Style")
with gr.Row():
trailer_out_w = gr.Number(label="Output Width", value=1920)
trailer_out_h = gr.Number(label="Output Height", value=1080)
trailer_generate_btn = gr.Button("πŸš€ Generate Trailer", variant="primary")
with gr.Column(scale=3):
trailer_output_video = gr.Video(label="Generated Trailer Video", show_download_button=True, height=UNIFIED_HEIGHT)
trailer_generate_btn.click(
fn=auto_trailer_wrapper,
inputs=[
trailer_input_video, trailer_total_duration, trailer_clip_duration,
trailer_analysis_method, trailer_transition, trailer_music,
trailer_out_w, trailer_out_h
],
outputs=trailer_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🎡 Audio Waveform"):
gr.Markdown("## Generate & Overlay Audio Waveforms")
gr.Info("Upload a video with an audio track to generate a dynamic waveform visualization.")
with gr.Row():
with gr.Column():
waveform_input_video = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
with gr.Row():
waveform_style = gr.Dropdown(["line", "p2p", "point"], value="line", label="Waveform Style")
waveform_size = gr.Textbox(value="800x200", label="Size (WxH)")
with gr.Row():
waveform_position = gr.Dropdown(["Bottom", "Center", "Top"], value="Bottom", label="Position")
waveform_color = gr.ColorPicker(value="#38bdf8", label="Color")
waveform_btn = gr.Button("🎢 Generate Waveform Video", variant="primary")
with gr.Column():
waveform_output_video = gr.Video(label="Output Video with Waveform", show_download_button=True, height=UNIFIED_HEIGHT)
waveform_btn.click(
fn=generate_waveform_video,
inputs=[waveform_input_video, waveform_style, waveform_size, waveform_position, waveform_color],
outputs=waveform_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ–ΌοΈ Picture-in-Picture"):
gr.Markdown("## Create Picture-in-Picture (PiP) & Image Overlays")
gr.Info("Upload a main video and an overlay (video or image). The overlay will be placed on top of the main video.")
with gr.Row():
with gr.Column():
pip_main_video = gr.Video(label="Main Video (Background)", height=UNIFIED_HEIGHT)
pip_overlay_media = gr.File(label="Overlay Media (Video or Image)", file_types=["video", "image"])
with gr.Row():
pip_position = gr.Dropdown(
["Top-Left", "Top-Center", "Top-Right", "Center-Left", "Center", "Center-Right", "Bottom-Left", "Bottom-Center", "Bottom-Right"],
value="Bottom-Right", label="Position"
)
pip_scale = gr.Slider(0.01, 0.75, 0.25, step=0.01, label="Overlay Size")
pip_btn = gr.Button("✨ Create PiP Video", variant="primary")
with gr.Column():
pip_output_video = gr.Video(label="Output PiP Video", show_download_button=True, height=UNIFIED_HEIGHT)
pip_btn.click(
fn=create_pip_video,
inputs=[pip_main_video, pip_overlay_media, pip_position, pip_scale],
outputs=pip_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ˜‚ Meme Creator"):
gr.Markdown("## Classic Meme & Text Overlay Creator")
gr.Info("Upload an image and add text. Font availability depends on your operating system.")
with gr.Row():
with gr.Column():
meme_input_image = gr.Image(type="numpy", label="Input Image", height=UNIFIED_HEIGHT)
meme_text = gr.Textbox(label="Text", placeholder="Your witty text here...")
meme_position = gr.Radio(["Top", "Center", "Bottom"], value="Top", label="Text Position")
meme_font = gr.Dropdown(
["Impact", "Arial", "Arial Black", "Comic Sans MS", "Courier New", "Georgia", "Tahoma", "Times New Roman", "Trebuchet MS", "Verdana"],
value="Impact",
label="Font"
)
with gr.Row():
meme_text_color = gr.ColorPicker(value="#FFFFFF", label="Text Color")
meme_outline_color = gr.ColorPicker(value="#000000", label="Outline Color")
meme_font_size = gr.Slider(1, 10, 5, step=1, label="Relative Font Size")
meme_btn = gr.Button("πŸ˜‚ Generate Meme", variant="primary")
with gr.Column():
meme_output_image = gr.Image(label="Output Image", show_download_button=True, height=UNIFIED_HEIGHT)
meme_btn.click(
fn=create_meme,
inputs=[meme_input_image, meme_text, meme_position, meme_font, meme_font_size, meme_text_color, meme_outline_color],
outputs=meme_output_image
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ€– FLUX.1 API"):
gr.Markdown("### Generate an image using `FLUX.1` models via Gradio Client.")
gr.Info("Requires a Hugging Face User Access Token.")
with gr.Row():
with gr.Column():
hf_token_input = gr.Textbox(label="HF Token", type="password", placeholder="Enter hf_... token")
flux_model_dropdown = gr.Dropdown(list(FLUX_MODELS.keys()), value="FLUX.1-schnell (Fast)", label="Select FLUX Model")
prompt_input_flux = gr.Textbox(label="Prompt", lines=3, placeholder="A cinematic photo...")
with gr.Row():
flux_width_slider = gr.Slider(256, 2048, 1024, step=64, label="Width")
flux_height_slider = gr.Slider(256, 2048, 1024, step=64, label="Height")
flux_btn = gr.Button("β˜„οΈ Generate Image", variant="primary")
with gr.Column():
output_image_flux = gr.Image(label="Generated Image", show_download_button=True, height=UNIFIED_HEIGHT)
flux_btn.click(call_flux_api, [prompt_input_flux, flux_model_dropdown, flux_width_slider, flux_height_slider, hf_token_input], output_image_flux).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ–ΌοΈ Image Utilities", elem_id="image_tab"):
with gr.Tabs():
with gr.TabItem("✨ Manipulate"):
gr.Markdown("### Simple Image Manipulation")
gr.Info("Apply a single transformation like inverting colors, flipping, or rotating.")
with gr.Row():
with gr.Column():
manip_input_image = gr.Image(type="numpy", label="Input Image", height=UNIFIED_HEIGHT)
manip_operation_radio = gr.Radio(
["Invert Colors", "Flip Horizontal", "Flip Vertical", "Rotate 90Β° Right", "Rotate 90Β° Left"],
label="Select Operation", value="Invert Colors"
)
manip_apply_btn = gr.Button("🎨 Apply Manipulation", variant="primary")
with gr.Column():
manip_output_image = gr.Image(label="Output Image", show_download_button=True, height=UNIFIED_HEIGHT)
manip_apply_btn.click(fn=manipulate_image, inputs=[manip_input_image, manip_operation_radio], outputs=manip_output_image).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("βœ‚οΈ Batch Cropper"):
gr.Markdown("### Crop a batch of images to the same dimensions.")
gr.Info("Upload images, and the first image will be used as a preview. Adjust the sliders to see the crop area, then process the whole batch.")
crop_original_preview_state = gr.State()
with gr.Row():
with gr.Column(scale=1):
crop_input_images = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
gr.Markdown("#### Crop Box Settings")
crop_box_w = gr.Slider(label="Width", minimum=64, maximum=4096, step=8, value=512)
crop_box_h = gr.Slider(label="Height", minimum=64, maximum=4096, step=8, value=512)
crop_box_x = gr.Slider(label="X Offset", minimum=0, maximum=4096, step=8, value=0)
crop_box_y = gr.Slider(label="Y Offset", minimum=0, maximum=4096, step=8, value=0)
crop_btn = gr.Button("βœ‚οΈ Crop All Images", variant="primary")
with gr.Column(scale=2):
crop_preview_image_display = gr.Image(label="Crop Preview (on first image)", type="pil", interactive=False, height=UNIFIED_HEIGHT)
crop_output_gallery = gr.Gallery(label="Cropped Images Preview", columns=4, object_fit="contain", height="auto")
crop_output_zip = gr.File(label="Download All as .zip", interactive=False)
def setup_crop_preview(files):
if not files:
return None, None, gr.update(), gr.update(), gr.update(), gr.update()
first_image_path = files[0].name
try:
img = Image.open(first_image_path).convert("RGB")
w, h = img.size
return img, img, gr.update(maximum=w, value=min(512, w)), gr.update(maximum=h, value=min(512, h)), gr.update(maximum=w), gr.update(maximum=h)
except Exception as e:
gr.Warning(f"Could not load preview image: {e}")
return None, None, gr.update(), gr.update(), gr.update(), gr.update()
crop_input_images.upload(
fn=setup_crop_preview,
inputs=[crop_input_images],
outputs=[crop_preview_image_display, crop_original_preview_state, crop_box_w, crop_box_h, crop_box_x, crop_box_y]
)
crop_sliders = [crop_box_x, crop_box_y, crop_box_w, crop_box_h]
for slider in crop_sliders:
slider.release(
fn=update_crop_preview,
inputs=[crop_original_preview_state] + crop_sliders,
outputs=crop_preview_image_display
)
crop_btn.click(
fn=batch_crop_images,
inputs=[crop_input_images] + crop_sliders,
outputs=[crop_output_gallery, crop_output_zip]
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🧩 Collage Maker"):
gr.Markdown("### Create a collage from multiple images.")
with gr.Row():
with gr.Column():
collage_input_images = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
collage_layout = gr.Radio(["Grid", "Horizontal", "Vertical"], value="Grid", label="Layout")
collage_w = gr.Slider(label="Collage Width", minimum=256, maximum=4096, value=1920, step=64)
collage_h = gr.Slider(label="Collage Height", minimum=256, maximum=4096, value=1080, step=64)
collage_bg_color = gr.ColorPicker(value="#000000", label="Background Color")
collage_btn = gr.Button("🎨 Create Collage", variant="primary")
with gr.Column():
collage_output_image = gr.Image(label="Output Collage", show_download_button=True, height=UNIFIED_HEIGHT)
collage_btn.click(
fn=create_collage,
inputs=[collage_input_images, collage_layout, collage_w, collage_h, collage_bg_color],
outputs=collage_output_image
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ“Έ Duo-Stitcher"):
gr.Markdown("### Smart Image Stitcher")
gr.Info("Upload two vertical images to join them side-by-side, or two horizontal images to stack them. The result is always a square.")
with gr.Row():
with gr.Column():
stitch_img1 = gr.Image(type="numpy", label="Image 1 (Left/Top)", height=UNIFIED_HEIGHT)
stitch_img2 = gr.Image(type="numpy", label="Image 2 (Right/Bottom)", height=UNIFIED_HEIGHT)
with gr.Row():
stitch_size = gr.Slider(512, 4096, 1024, step=128, label="Output Size (pixels)")
stitch_bg_color = gr.ColorPicker(value="#000000", label="Background Color")
stitch_btn = gr.Button("🧩 Stitch Images", variant="primary")
with gr.Column():
stitch_output_image = gr.Image(label="Stitched Output Image", show_download_button=True, height=UNIFIED_HEIGHT)
stitch_btn.click(
fn=stitch_images_smartly,
inputs=[stitch_img1, stitch_img2, stitch_size, stitch_bg_color],
outputs=stitch_output_image
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ“Ή Image to Video"):
gr.Markdown("### Create a short, looping video from a single static image.")
with gr.Row():
with gr.Column():
input_image_i2v = gr.Image(type="numpy", label="Input Image", height=UNIFIED_HEIGHT)
duration_slider_i2v = gr.Slider(1, 30, 5, step=0.1, label="Duration (s)")
input_audio_i2v = gr.Audio(label="Add Music (Optional)", type="filepath")
compile_i2v_btn = gr.Button("🎬 Create Looping Video", variant="primary")
with gr.Column():
output_video_i2v = gr.Video(label="Output Looping Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
compile_i2v_btn.click(image_to_looping_video, [input_image_i2v, duration_slider_i2v, input_audio_i2v], output_video_i2v).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ”Ž Zoom Video"):
gr.Markdown("### Create a 'Ken Burns' style zoom/pan video from an image.")
gr.Info("Upload one or more images. The output will be a gallery of videos, or a single combined video if you check the box.")
with gr.Row():
with gr.Column():
i2zv_input_images = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
i2zv_duration = gr.Slider(1, 30, 5, step=0.5, label="Video Duration (s) per Image")
i2zv_zoom_ratio = gr.Slider(1.0, 2.0, 1.25, step=0.05, label="Zoom Ratio")
i2zv_zoom_dir = gr.Dropdown(
["Center", "Top", "Bottom", "Left", "Right", "Top-Left", "Top-Right", "Bottom-Left", "Bottom-Right"],
value="Center", label="Zoom Direction"
)
i2zv_combine = gr.Checkbox(label="Combine all videos into one", value=False)
i2zv_audio = gr.Audio(label="Add Music (Optional)", type="filepath")
i2zv_btn = gr.Button("✨ Create Zoom Video(s)", variant="primary")
with gr.Column():
i2zv_output_gallery = gr.Gallery(label="Output Video Previews", columns=2, object_fit="contain", visible=True)
i2zv_output_video = gr.Video(label="Combined Output Video", interactive=True, visible=False, show_download_button=True, height=UNIFIED_HEIGHT)
i2zv_output_zip = gr.File(label="Download All as .zip", interactive=False)
i2zv_combine.change(
fn=lambda x: [gr.update(visible=not x), gr.update(visible=x)],
inputs=i2zv_combine,
outputs=[i2zv_output_gallery, i2zv_output_video]
)
i2zv_btn.click(
fn=create_zoom_videos,
inputs=[i2zv_input_images, i2zv_duration, i2zv_zoom_ratio, i2zv_zoom_dir, i2zv_combine, i2zv_audio],
outputs=[i2zv_output_gallery, i2zv_output_video, i2zv_output_zip]
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("βœ‚οΈ Batch BG Remover"):
gr.Markdown("### Remove the background from a batch of images.")
with gr.Row():
with gr.Column():
input_images_bg = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
remove_bg_btn = gr.Button("🧼 Remove Backgrounds", variant="primary")
with gr.Column():
output_gallery_bg = gr.Gallery(label="Images with Transparent Background", columns=4, object_fit="contain", height="auto")
output_zip_bg = gr.File(label="Download All as .zip", interactive=False)
remove_bg_btn.click(remove_background_batch, input_images_bg, [output_gallery_bg, output_zip_bg]).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ–‹οΈ Batch Watermarker"):
gr.Markdown("### Apply a text watermark to a batch of images.")
with gr.Row():
with gr.Column():
input_images_wm = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
watermark_text = gr.Textbox(label="Watermark Text", placeholder="(c) My Awesome Project")
watermark_pos = gr.Radio(["Top-Left", "Top-Right", "Bottom-Left", "Bottom-Right", "Center"], value="Bottom-Right", label="Position")
watermark_opacity = gr.Slider(0, 100, 50, step=1, label="Opacity (%)")
watermark_btn = gr.Button("✍️ Apply Watermarks", variant="primary")
with gr.Column():
output_gallery_wm = gr.Gallery(label="Watermarked Images", columns=4, object_fit="contain", height="auto")
output_zip_wm = gr.File(label="Download All as .zip", interactive=False)
watermark_btn.click(apply_watermark_batch, [input_images_wm, watermark_text, watermark_pos, watermark_opacity], [output_gallery_wm, output_zip_wm]).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ“ Batch Resizer & Converter"):
gr.Markdown("### Convert, resize, and compress a batch of images.")
gr.Info("Choose a preset for quick resizing or select 'Custom' to enter your own dimensions.")
with gr.Row():
with gr.Column():
brc_input_images = gr.File(label="Upload Images", file_count="multiple", file_types=["image"])
with gr.Accordion("βš™οΈ Output Settings", open=True):
brc_preset = gr.Dropdown(
label="Resolution Presets",
choices=[
"1024x1024 (Square)", "768x768 (Square)", "720x720 (Square)",
"1280x720 (Landscape 16:9)", "720x1280 (Portrait 9:16)",
"768x512 (Landscape 3:2)", "512x768 (Portrait 2:3)",
"Custom"
],
value="1024x1024 (Square)"
)
with gr.Row(visible=False) as brc_custom_size_row:
brc_max_w = gr.Number(label="Width", value=1024)
brc_max_h = gr.Number(label="Height", value=1024)
brc_resize_mode = gr.Radio(["Fit (preserve aspect ratio)", "Stretch to Fit"], value="Fit (preserve aspect ratio)", label="Resize Mode")
brc_format = gr.Dropdown(["JPG", "PNG", "WEBP"], value="JPG", label="Output Format")
brc_quality = gr.Slider(1, 100, 90, step=1, label="JPG/WEBP Quality", interactive=True)
brc_btn = gr.Button("πŸš€ Process Images", variant="primary")
with gr.Column():
brc_output_gallery = gr.Gallery(label="Processed Images Preview", columns=4, object_fit="contain", height="auto")
brc_output_zip = gr.File(label="Download All as .zip", interactive=False)
def update_resizer_from_preset(preset_str):
if preset_str == "Custom":
return gr.update(visible=True), gr.update(), gr.update()
match = re.search(r'(\d+)\s*x\s*(\d+)', preset_str)
if match:
w, h = int(match.group(1)), int(match.group(2))
return gr.update(visible=False), w, h
# Fallback to a default if parsing fails for some reason
return gr.update(visible=False), 1024, 1024
brc_preset.change(
fn=update_resizer_from_preset,
inputs=brc_preset,
outputs=[brc_custom_size_row, brc_max_w, brc_max_h]
)
# Initialize the values on load
demo.load(
fn=update_resizer_from_preset,
inputs=brc_preset,
outputs=[brc_custom_size_row, brc_max_w, brc_max_h]
)
brc_format.change(lambda f: gr.update(visible=f in ["JPG", "WEBP"]), brc_format, brc_quality)
brc_btn.click(
# The 'enable_resize' parameter is now implicitly True
lambda files, out_f, qual, w, h, mode: batch_resize_convert_images(files, out_f, qual, True, w, h, mode),
[brc_input_images, brc_format, brc_quality, brc_max_w, brc_max_h, brc_resize_mode],
[brc_output_gallery, brc_output_zip]
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸŽ₯ Video Utilities", elem_id="video_tab"):
gr.Markdown("## A collection of useful video tools.")
with gr.Tabs():
with gr.TabItem("🎞️ Frame Tools"):
with gr.Tabs():
with gr.TabItem("First & Last"):
gr.Markdown("### Extract the very first and very last frames of a video.")
with gr.Row():
with gr.Column():
input_video_fl = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
extract_fl_btn = gr.Button("🎬 Extract Frames", variant="primary")
with gr.Column():
output_gallery_fl = gr.Gallery(label="Output Frames (First, Last)", columns=2, object_fit="contain", height="auto")
extract_fl_btn.click(fn=extract_first_last_frame, inputs=input_video_fl, outputs=output_gallery_fl).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Batch First & Last"):
gr.Markdown("### Extract the first and last frames from multiple videos.")
with gr.Row():
with gr.Column():
batch_fl_input_videos = gr.File(label="Upload Videos", file_count="multiple", file_types=["video"])
batch_fl_process_btn = gr.Button("🎬 Extract All Frames", variant="primary")
with gr.Column():
batch_fl_output_gallery = gr.Gallery(label="Extracted Frames Preview", columns=6, object_fit="contain", height="auto")
batch_fl_output_zip = gr.File(label="Download All Frames (.zip)", interactive=False)
batch_fl_process_btn.click(
fn=batch_extract_first_last_frames,
inputs=batch_fl_input_videos,
outputs=[batch_fl_output_gallery, batch_fl_output_zip],
show_progress="full"
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Video to Frames"):
gr.Markdown("### Extract all individual frames from a video file.")
with gr.Row():
with gr.Column():
input_video_v2f = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
v2f_fps_display = gr.Textbox(label="Detected FPS", interactive=False, value="N/A")
with gr.Accordion("βš™οΈ Advanced Options", open=False):
v2f_skip_rate = gr.Slider(1, 30, 1, step=1, label="Extract Every Nth Frame")
v2f_rotation = gr.Dropdown(["None", "90 Degrees Clockwise", "90 Degrees Counter-Clockwise", "180 Degrees"], value="None", label="Rotation")
v2f_format = gr.Radio(["PNG", "JPG"], value="PNG", label="Output Format")
v2f_jpg_quality = gr.Slider(1, 100, 95, step=1, label="JPG Quality", interactive=False)
v2f_resize = gr.Checkbox(label="Resize all extracted frames", value=False)
with gr.Row():
v2f_width = gr.Number(label="Output Width", value=1024, interactive=False)
v2f_height = gr.Number(label="Output Height", value=576, interactive=False)
extract_v2f_btn = gr.Button("πŸͺš Extract All Frames", variant="primary")
with gr.Column():
output_gallery_v2f = gr.Gallery(label="Extracted Frames Preview (max 100 shown)", columns=8, object_fit="contain", height="auto")
output_zip_v2f = gr.File(label="Download All Frames (.zip)", interactive=False)
input_video_v2f.upload(lambda v: f"{get_video_fps(v):.2f} FPS", input_video_v2f, v2f_fps_display)
v2f_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x)], v2f_resize, [v2f_width, v2f_height])
v2f_format.change(lambda x: gr.update(interactive=(x=="JPG")), v2f_format, v2f_jpg_quality)
extract_v2f_btn.click(video_to_frames_extractor, [input_video_v2f, v2f_skip_rate, v2f_rotation, v2f_resize, v2f_width, v2f_height, v2f_format, v2f_jpg_quality], [output_gallery_v2f, output_zip_v2f]).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Frames to Video"):
gr.Markdown("### Compile a sequence of image frames into a video file.")
with gr.Row():
with gr.Column():
input_frames_f2v = gr.File(label="Upload Frames", file_count="multiple", file_types=["image"])
fps_slider_f2v = gr.Slider(1, 60, 24, step=1, label="FPS")
with gr.Accordion("βš™οΈ Advanced Options", open=False):
f2v_rotation = gr.Dropdown(["None", "90 Degrees Clockwise", "90 Degrees Counter-Clockwise", "180 Degrees"], value="None", label="Rotation")
f2v_resize = gr.Checkbox(label="Resize all frames", value=False)
with gr.Row():
f2v_width = gr.Number(label="Output Width", value=1024, interactive=False)
f2v_height = gr.Number(label="Output Height", value=576, interactive=False)
compile_f2v_btn = gr.Button("πŸ“½οΈ Create Video", variant="primary")
with gr.Column():
output_video_f2v = gr.Video(label="Compiled Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
f2v_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x)], f2v_resize, [f2v_width, f2v_height])
compile_f2v_btn.click(create_video_from_frames, [input_frames_f2v, fps_slider_f2v, f2v_rotation, f2v_resize, f2v_width, f2v_height], output_video_f2v).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("βœ‚οΈ Editing"):
with gr.Tabs():
with gr.TabItem("🎞️ Slo-Mo & Enhance"):
gr.Markdown("## AI Slow-Motion")
gr.Info(
"Create super smooth slow-motion videos. The 'AI-Enhanced' method uses frame interpolation for the best quality."
)
if not ENHANCE_AI_AVAILABLE:
gr.Warning(
"AI models not found. The 'AI-Enhanced' option is disabled. Please install 'rife-ncnn-vulkan-python' to enable it."
)
with gr.Row():
with gr.Column():
slowmo_input_videos = gr.File(label="Upload Videos", file_count="multiple", file_types=["video"])
with gr.Accordion("βš™οΈ Settings", open=True):
slowmo_factor = gr.Dropdown(["2x", "4x", "8x"], value="2x", label="Slowdown Factor")
method_choices = ["Standard (Fast)"]
if ENHANCE_AI_AVAILABLE:
method_choices.append("AI-Enhanced (High Quality)")
slowmo_method = gr.Radio(
method_choices,
value=method_choices[-1] if ENHANCE_AI_AVAILABLE else method_choices[0],
label="Interpolation Method"
)
slowmo_btn = gr.Button("πŸš€ Process Videos", variant="primary")
with gr.Column():
slowmo_output_gallery = gr.Gallery(label="Output Video Previews", columns=1, object_fit="contain")
slowmo_output_zip = gr.File(label="Download All as .zip", interactive=False)
slowmo_btn.click(
fn=batch_slowmo_enhance_videos,
inputs=[slowmo_input_videos, slowmo_factor, slowmo_method],
outputs=[slowmo_output_gallery, slowmo_output_zip],
show_progress="full"
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ”ͺ Auto Jump-Cut"):
gr.Markdown("### Automatic Silence Remover (Jump-Cutter)")
gr.Info("1. Upload a video. A waveform will appear. 2. Adjust the 'Silence Threshold' slider so it's above the thin 'noise floor' line but below your voice peaks. 3. Process the video!")
with gr.Row():
with gr.Column(scale=2):
jumpcut_input_video = gr.Video(label="Input Video (with audio)", height=UNIFIED_HEIGHT)
jc_waveform_preview = gr.Image(label="Audio Waveform Preview", interactive=False)
with gr.Accordion("βš™οΈ Cut Settings", open=True):
jumpcut_threshold = gr.Slider(minimum=-60, maximum=-20, value=-30, step=1, label="Silence Threshold (dB)", info="Anything quieter than this is 'silence'. Lower values are more strict.")
jumpcut_duration = gr.Slider(minimum=0.1, maximum=2.0, value=0.5, step=0.1, label="Minimum Silence Duration (s)", info="Silences shorter than this will be ignored.")
with gr.Accordion("πŸ“ Output Resolution", open=False):
jc_resolution = gr.Radio(
["Keep Original", "1080p (1920x1080)", "Portrait (1080x1920)", "Custom"],
value="Keep Original", label="Output Resolution"
)
with gr.Row(visible=False) as jc_custom_res_row:
jc_custom_w = gr.Number(label="Custom Width", value=1920)
jc_custom_h = gr.Number(label="Custom Height", value=1080)
jumpcut_btn = gr.Button("πŸ”ͺ Perform Jump-Cut", variant="primary")
with gr.Column(scale=3):
jumpcut_output_video = gr.Video(label="Edited Video Output", show_download_button=True, height=UNIFIED_HEIGHT)
jumpcut_input_video.upload(fn=generate_waveform_preview, inputs=jumpcut_input_video, outputs=jc_waveform_preview)
jc_resolution.change(
fn=lambda choice: gr.update(visible=(choice == "Custom")),
inputs=jc_resolution,
outputs=jc_custom_res_row
)
jumpcut_btn.click(
fn=auto_jump_cut,
inputs=[jumpcut_input_video, jumpcut_threshold, jumpcut_duration, jc_resolution, jc_custom_w, jc_custom_h],
outputs=jumpcut_output_video,
show_progress="full"
).then(fn=None, js=fire_confetti_and_sound_js)
# NEW FEATURE: Video Silence Chopper
with gr.TabItem("βœ‚οΈ Video Silence Chopper"):
gr.Markdown("### Automatic Video Silence Chopper")
gr.Info("Splits a video into multiple clips, removing the silent parts. Ideal for isolating spoken phrases from interviews or lectures.")
with gr.Row():
with gr.Column(scale=2):
video_chopper_input = gr.Video(label="Input Video (with audio)", height=UNIFIED_HEIGHT)
with gr.Accordion("βš™οΈ Silence Settings", open=True):
video_chopper_threshold = gr.Slider(minimum=-60, maximum=-20, value=-30, step=1, label="Silence Threshold (dB)")
video_chopper_duration = gr.Slider(minimum=0.1, maximum=2.0, value=0.5, step=0.1, label="Minimum Silence Duration (s)")
with gr.Accordion("πŸ“ Output Resolution", open=False):
video_chopper_resolution = gr.Radio(["Keep Original", "1080p (1920x1080)", "Portrait (1080x1920)", "Custom"], value="Keep Original", label="Output Resolution")
with gr.Row(visible=False) as vc_custom_res_row:
vc_custom_w = gr.Number(label="Custom Width", value=1920)
vc_custom_h = gr.Number(label="Custom Height", value=1080)
video_chopper_btn = gr.Button("βœ‚οΈ Chop Video into Clips", variant="primary")
with gr.Column(scale=3):
video_chopper_gallery = gr.Gallery(label="Chopped Video Clips (Preview)", columns=2, object_fit="contain", height="auto")
video_chopper_zip = gr.File(label="Download All Clips as .zip", interactive=False)
video_chopper_resolution.change(
fn=lambda choice: gr.update(visible=(choice == "Custom")),
inputs=video_chopper_resolution,
outputs=vc_custom_res_row
)
video_chopper_btn.click(
fn=chop_video_on_silence,
inputs=[video_chopper_input, video_chopper_threshold, video_chopper_duration, video_chopper_resolution, vc_custom_w, vc_custom_h],
outputs=[video_chopper_gallery, video_chopper_zip],
show_progress="full"
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("β–¦ Video Grid"):
gr.Markdown("### Video Grid Compiler")
gr.Info("Combine multiple videos into a single-screen layout. Videos will be scaled to fit within their grid tile.")
with gr.Row():
with gr.Column():
grid_input_videos = gr.File(label="Upload Videos", file_count="multiple", file_types=["video"])
grid_layout = gr.Dropdown(
["2x1 (Side-by-Side)", "1x2 (Stacked)", "2x2 (Quad-View)", "4x4 (16-View)", "8x4 (32-View)"],
value="2x1 (Side-by-Side)",
label="Grid Layout"
)
grid_w = gr.Slider(label="Output Width", minimum=256, maximum=4096, value=1920, step=64)
grid_h = gr.Slider(label="Output Height", minimum=256, maximum=4096, value=1080, step=64)
grid_bg_color = gr.ColorPicker(value="#000000", label="Background Color")
with gr.Accordion("Audio Settings", open=True):
grid_audio_choice = gr.Dropdown(
choices=["From Video 1", "None"],
value="From Video 1",
label="Video Audio Source",
info="Select which video's audio to use, or None."
)
grid_music_track = gr.Audio(label="Add External Music (Overrides above)", type="filepath")
grid_btn = gr.Button("β–¦ Compile Grid", variant="primary")
with gr.Column():
grid_output_video = gr.Video(label="Grid Video Output", show_download_button=True, height=UNIFIED_HEIGHT)
grid_input_videos.upload(update_audio_source_choices_for_grid, grid_input_videos, grid_audio_choice)
grid_btn.click(
fn=compile_video_grid,
inputs=[grid_input_videos, grid_layout, grid_w, grid_h, grid_bg_color, grid_audio_choice, grid_music_track],
outputs=grid_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Manipulate"):
gr.Markdown("### Simple Video Manipulation")
gr.Info("Apply a single transformation like inverting colors, flipping, or rotating to every frame of a video.")
with gr.Row():
with gr.Column():
vmanip_input_video = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
vmanip_operation_radio = gr.Radio(
["Invert Colors", "Flip Horizontal", "Flip Vertical", "Rotate 90Β° Right", "Rotate 90Β° Left"],
label="Select Operation", value="Invert Colors"
)
vmanip_apply_btn = gr.Button("✨ Apply Manipulation", variant="primary")
with gr.Column():
vmanip_output_video = gr.Video(label="Output Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
vmanip_apply_btn.click(fn=manipulate_video, inputs=[vmanip_input_video, vmanip_operation_radio], outputs=vmanip_output_video).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Ping-Pong"):
gr.Markdown("### Create a forward-then-reverse video loop (Boomerang).")
with gr.Row():
with gr.Column():
input_video_pingpong = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
audio_option_pingpong = gr.Radio(["Remove Audio", "Original Audio Only", "Reverse Audio"], value="Remove Audio", label="Audio Handling")
pingpong_btn = gr.Button("πŸ“ Create Ping-Pong Video", variant="primary")
with gr.Column():
output_video_pingpong = gr.Video(label="Ping-Pong Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
pingpong_btn.click(fn=ping_pong_video, inputs=[input_video_pingpong, audio_option_pingpong], outputs=output_video_pingpong).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Reverse"):
gr.Markdown("### Reverse a video clip.")
with gr.Row():
with gr.Column():
input_video_reverse = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
audio_option_reverse = gr.Radio(["Remove Audio", "Reverse Audio"], value="Remove Audio", label="Audio Handling")
reverse_btn = gr.Button("πŸ”„ Reverse Video", variant="primary")
with gr.Column():
output_video_reverse = gr.Video(label="Reversed Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
reverse_btn.click(fn=reverse_video, inputs=[input_video_reverse, audio_option_reverse], outputs=output_video_reverse).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Merger"):
gr.Markdown("### Simple Video Merger")
gr.Info("Upload two or more video clips to join them together in sequence. All clips will be conformed to the resolution and framerate of the first video.")
with gr.Row():
with gr.Column():
merger_input_videos = gr.File(label="Upload Videos (2 or more)", file_count="multiple", file_types=["video"])
merger_btn = gr.Button("πŸ”— Merge Videos", variant="primary")
with gr.Column():
merger_output_video = gr.Video(label="Merged Video", show_download_button=True, height=UNIFIED_HEIGHT)
merger_btn.click(
fn=merge_videos,
inputs=merger_input_videos,
outputs=merger_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Stabilizer"):
gr.Markdown("### Smooth Out Shaky Video Footage")
gr.Info("This tool uses a two-pass process to analyze and stabilize shaky videos. Higher 'Shakiness' values are for very unstable footage. Higher 'Smoothing' creates a more fluid, gliding look but can introduce slight cropping/warping.")
with gr.Row():
with gr.Column():
stab_input_video = gr.Video(label="Input Shaky Video", height=UNIFIED_HEIGHT)
stab_shakiness = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Shakiness Level", info="How shaky is the source video? (1=low, 10=high)")
stab_smoothing = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Smoothing Amount", info="How much smoothing to apply. (More = smoother but might feel 'floaty')")
stab_btn = gr.Button("✨ Stabilize Video", variant="primary")
with gr.Column():
stab_output_video = gr.Video(label="Stabilized Output Video", show_download_button=True, height=UNIFIED_HEIGHT)
stab_btn.click(
fn=stabilize_video,
inputs=[stab_input_video, stab_shakiness, stab_smoothing],
outputs=stab_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Trimmer"):
gr.Markdown("### Visually trim a video. Use the player to find a frame, then set it as the start or end point.")
gr.Info("Keyboard hotkeys enabled: K = Play/Pause, J = Back 1 Frame, L = Forward 1 Frame (hover mouse over video)")
with gr.Row():
with gr.Column(scale=2):
input_video_trim = gr.Video(label="Input Video", elem_id="video-trim-input", height=UNIFIED_HEIGHT)
with gr.Row():
set_start_btn = gr.Button("Set Current Frame as START")
set_end_btn = gr.Button("Set Current Frame as END")
trim_btn = gr.Button("βœ‚οΈ Trim Video", variant="primary")
with gr.Column(scale=1):
gr.Markdown("#### Trim Points")
start_frame_img = gr.Image(label="Start Frame", interactive=False)
trim_start_time_display = gr.Textbox(label="Start Time (s)", interactive=False)
end_frame_img = gr.Image(label="End Frame", interactive=False)
trim_end_time_display = gr.Textbox(label="End Time (s)", interactive=False)
trim_start_time = gr.Number(value=0, visible=False)
trim_end_time = gr.Number(value=0, visible=False)
with gr.Row():
output_video_trim = gr.Video(label="Trimmed Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
get_current_time_js = """()=>{const e=document.querySelector("#video-trim-input video");return e?e.currentTime:0}"""
def get_frame_from_time_wrapper(v,t): return get_frame_at_time(v,t), f"{t:.3f}"
set_start_btn.click(fn=None, js=get_current_time_js, outputs=[trim_start_time])
set_end_btn.click(fn=None, js=get_current_time_js, outputs=[trim_end_time])
trim_start_time.change(fn=get_frame_from_time_wrapper, inputs=[input_video_trim, trim_start_time], outputs=[start_frame_img, trim_start_time_display])
trim_end_time.change(fn=get_frame_from_time_wrapper, inputs=[input_video_trim, trim_end_time], outputs=[end_frame_img, trim_end_time_display])
trim_btn.click(fn=trim_video, inputs=[input_video_trim, trim_start_time, trim_end_time], outputs=output_video_trim).then(fn=None, js=fire_confetti_and_sound_js)
input_video_trim.clear(fn=lambda: (None, "0.00", None, "0.00", 0, 0), outputs=[start_frame_img, trim_start_time_display, end_frame_img, trim_end_time_display, trim_start_time, trim_end_time])
with gr.TabItem("Crop & Resize"):
gr.Markdown("### Visually crop a video.")
gr.Info("Upload a video to see a preview frame. Adjust the sliders to define the crop area, then process.")
video_crop_original_preview_state = gr.State()
with gr.Row():
with gr.Column(scale=1):
video_crop_input_video = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
gr.Markdown("#### Crop Box Settings")
video_crop_w = gr.Slider(label="Width", minimum=64, maximum=4096, step=8, value=1280)
video_crop_h = gr.Slider(label="Height", minimum=64, maximum=4096, step=8, value=720)
video_crop_x = gr.Slider(label="X Offset", minimum=0, maximum=4096, step=8, value=0)
video_crop_y = gr.Slider(label="Y Offset", minimum=0, maximum=4096, step=8, value=0)
with gr.Accordion("Optional: Resize after cropping", open=False):
video_crop_do_resize = gr.Checkbox(label="Enable Resizing", value=False)
video_crop_resize_w = gr.Number(label="Output Width", value=1024, interactive=False)
video_crop_resize_h = gr.Number(label="Output Height", value=576, interactive=False)
video_crop_btn = gr.Button("πŸ“ Crop Video", variant="primary")
with gr.Column(scale=2):
video_crop_preview_image = gr.Image(label="Crop Preview", type="pil", interactive=False, height=UNIFIED_HEIGHT)
video_crop_output_video = gr.Video(label="Cropped Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
def setup_video_crop_preview(video_path):
if not video_path:
return None, None, gr.update(), gr.update(), gr.update(), gr.update()
try:
img = get_frame_at_time(video_path, 0)
w, h = img.size
return img, img, gr.update(maximum=w, value=w), gr.update(maximum=h, value=h), gr.update(maximum=w), gr.update(maximum=h)
except Exception as e:
gr.Warning(f"Could not load preview frame: {e}")
return None, None, gr.update(), gr.update(), gr.update(), gr.update()
video_crop_input_video.upload(
fn=setup_video_crop_preview,
inputs=video_crop_input_video,
outputs=[video_crop_preview_image, video_crop_original_preview_state, video_crop_w, video_crop_h, video_crop_x, video_crop_y]
)
video_crop_sliders = [video_crop_x, video_crop_y, video_crop_w, video_crop_h]
for slider in video_crop_sliders:
slider.release(
fn=update_crop_preview,
inputs=[video_crop_original_preview_state] + video_crop_sliders,
outputs=video_crop_preview_image
)
video_crop_do_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x)], inputs=video_crop_do_resize, outputs=[video_crop_resize_w, video_crop_resize_h])
video_crop_btn.click(
fn=crop_video,
inputs=[video_crop_input_video, video_crop_x, video_crop_y, video_crop_w, video_crop_h, video_crop_do_resize, video_crop_resize_w, video_crop_resize_h],
outputs=video_crop_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Change Speed"):
gr.Markdown("### Create slow-motion or fast-forward videos.")
with gr.Row():
with gr.Column():
input_video_speed = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
speed_multiplier = gr.Slider(0.1, 10.0, 1.0, step=0.1, label="Speed Multiplier")
speed_btn = gr.Button("πŸƒ Change Speed", variant="primary")
with gr.Column():
output_video_speed = gr.Video(label="Modified Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
speed_btn.click(fn=change_video_speed, inputs=[input_video_speed, speed_multiplier], outputs=output_video_speed).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🎨 Effects & Overlays"):
with gr.Tabs():
with gr.TabItem("Fader"):
gr.Markdown("### Apply Fade-In and/or Fade-Out to a Video")
with gr.Row():
with gr.Column():
fade_input_video = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
with gr.Row():
fade_in_slider = gr.Slider(0.0, 10.0, 1.0, step=0.1, label="Fade-In Duration (s)")
fade_out_slider = gr.Slider(0.0, 10.0, 1.0, step=0.1, label="Fade-Out Duration (s)")
fade_video_btn = gr.Button("✨ Apply Fade", variant="primary")
with gr.Column():
fade_output_video = gr.Video(label="Faded Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
fade_video_btn.click(apply_video_fade, [fade_input_video, fade_in_slider, fade_out_slider], fade_output_video).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🎨 Color Grading"):
gr.Markdown("### Adjust Video Color, Contrast, and Sharpness")
gr.Info("Upload a video, load a preview frame, adjust the sliders for a live preview, and then apply the changes to the full video. The preview is 100% accurate to the final render.")
with gr.Row(equal_height=False):
with gr.Column(scale=1):
color_grade_input_video = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
load_preview_btn = gr.Button("πŸ–ΌοΈ Load/Reset Preview Frame")
gr.Markdown("#### Adjustments")
cg_brightness = gr.Slider(minimum=-0.5, maximum=0.5, value=0.0, step=0.05, label="Brightness")
cg_contrast = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.05, label="Contrast")
cg_saturation = gr.Slider(minimum=0.0, maximum=3.0, value=1.0, step=0.1, label="Saturation")
cg_sharpness = gr.Slider(minimum=0.0, maximum=1.5, value=0.0, step=0.1, label="Sharpness")
apply_grading_btn = gr.Button("🎬 Apply Grading to Full Video", variant="primary")
with gr.Column(scale=2):
with gr.Row():
cg_before_preview = gr.Image(label="Before", type="numpy", interactive=False)
cg_after_preview = gr.Image(label="After (Accurate Preview)", type="pil", interactive=False)
color_grade_output_video = gr.Video(label="Graded Video Output", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
grading_inputs = [cg_before_preview, cg_brightness, cg_contrast, cg_saturation, cg_sharpness]
cg_brightness.release(preview_color_grading_ffmpeg, inputs=grading_inputs, outputs=cg_after_preview)
cg_contrast.release(preview_color_grading_ffmpeg, inputs=grading_inputs, outputs=cg_after_preview)
cg_saturation.release(preview_color_grading_ffmpeg, inputs=grading_inputs, outputs=cg_after_preview)
cg_sharpness.release(preview_color_grading_ffmpeg, inputs=grading_inputs, outputs=cg_after_preview)
load_preview_btn.click(
fn=get_frame_at_time,
inputs=color_grade_input_video,
outputs=cg_before_preview
).then(
fn=preview_color_grading_ffmpeg,
inputs=grading_inputs,
outputs=cg_after_preview
)
color_grade_input_video.upload(
fn=get_frame_at_time,
inputs=color_grade_input_video,
outputs=cg_before_preview
).then(
fn=preview_color_grading_ffmpeg,
inputs=grading_inputs,
outputs=cg_after_preview
)
apply_grading_btn.click(
fn=apply_color_grading,
inputs=[color_grade_input_video, cg_brightness, cg_contrast, cg_saturation, cg_sharpness],
outputs=color_grade_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("BG Remover"):
gr.Markdown("## Video Background Remover")
gr.Warning("This is a very slow process. A short video can take several minutes. Output is a .webm file.")
with gr.Row():
with gr.Column():
vbg_input_video = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
vbg_btn = gr.Button("βœ‚οΈ Remove Video Background", variant="primary")
with gr.Column():
vbg_output_video = gr.Video(label="Output Video with Transparency (.webm)", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
vbg_btn.click(remove_video_background, vbg_input_video, vbg_output_video).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Watermark"):
gr.Markdown("### Apply a text watermark to a video.")
with gr.Row():
with gr.Column():
wm_input_video = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
wm_text = gr.Textbox(label="Watermark Text", placeholder="(c) My Video 2025")
wm_pos = gr.Radio(["Top-Left", "Top-Right", "Bottom-Left", "Bottom-Right", "Center"], value="Bottom-Right", label="Position")
wm_opacity = gr.Slider(0, 100, 70, step=1, label="Opacity (%)")
with gr.Accordion("Advanced Options", open=False):
wm_size = gr.Slider(1, 10, 5, step=1, label="Relative Font Size")
wm_color = gr.ColorPicker(value="#FFFFFF", label="Font Color")
wm_btn = gr.Button("πŸ–‹οΈ Apply Watermark", variant="primary")
with gr.Column():
wm_output_video = gr.Video(label="Watermarked Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
wm_btn.click(apply_video_watermark, [wm_input_video, wm_text, wm_pos, wm_opacity, wm_size, wm_color], wm_output_video).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Create GIF"):
gr.Markdown("### Convert a video clip into a high-quality animated GIF.")
with gr.Row():
with gr.Column():
input_video_gif = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
with gr.Row():
gif_start_time = gr.Number(value=0, label="Start Time (s)")
gif_end_time = gr.Number(value=0, label="End Time (s)", info="Set to 0 for full duration")
gif_btn = gr.Button("πŸ–ΌοΈ Create GIF", variant="primary")
with gr.Column():
output_gif = gr.Image(label="Output GIF", show_download_button=True, height=UNIFIED_HEIGHT)
gif_btn.click(create_gif_from_video, [input_video_gif, gif_start_time, gif_end_time], output_gif).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ”Š Audio & Transcription"):
with gr.Tabs():
with gr.TabItem("Add Audio"):
gr.Markdown("### Combine a silent video with an audio file.")
with gr.Row():
with gr.Column():
input_video_audio = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
input_audio = gr.Audio(type="filepath", label="Input Audio")
add_audio_btn = gr.Button("🎢 Add Audio", variant="primary")
with gr.Column():
output_video_audio = gr.Video(label="Final Video with Audio", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
add_audio_btn.click(add_audio_to_video, [input_video_audio, input_audio], output_video_audio).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Extract Audio"):
gr.Markdown("### Strip the audio track from a video file.")
with gr.Row():
with gr.Column():
extract_audio_input_video = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
extract_audio_format = gr.Dropdown(["mp3", "wav", "aac"], value="mp3", label="Output Audio Format")
extract_audio_btn = gr.Button("🎡 Extract Audio", variant="primary")
with gr.Column():
extract_audio_output = gr.Audio(label="Extracted Audio", type="filepath")
extract_audio_btn.click(extract_audio, [extract_audio_input_video, extract_audio_format], extract_audio_output).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Silence Chopper"):
gr.Markdown("### Automatic Silence Chopper")
gr.Info("Automatically split an audio file into multiple smaller files, removing the silent parts.")
with gr.Row():
with gr.Column():
chopper_input_audio = gr.Audio(type="filepath", label="Input Audio File")
with gr.Accordion("βš™οΈ Silence Settings", open=True):
chopper_thresh = gr.Slider(minimum=-70, maximum=-20, value=-40, step=1, label="Silence Threshold (dBFS)", info="Anything quieter than this is considered silence. Lower numbers are stricter.")
chopper_min_len = gr.Slider(minimum=100, maximum=2000, value=500, step=50, label="Minimum Silence Length (ms)", info="Silences shorter than this will be ignored.")
chopper_btn = gr.Button("βœ‚οΈ Chop Audio", variant="primary")
with gr.Column():
chopper_output_gallery = gr.Gallery(label="Chopped Audio Files (Preview)", columns=2, object_fit="contain", height="auto", allow_preview=False)
chopper_output_zip = gr.File(label="Download All Chunks as .zip", interactive=False)
chopper_btn.click(
fn=chop_audio_on_silence,
inputs=[chopper_input_audio, chopper_thresh, chopper_min_len],
outputs=[chopper_output_gallery, chopper_output_zip],
show_progress="full"
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Audio Editor"):
gr.Markdown("### Trim and Apply Fades to an Audio File")
gr.Info("Set start/end times to trim the clip, then apply optional fades.")
with gr.Row():
with gr.Column():
audio_trim_input = gr.Audio(type="filepath", label="Input Audio")
with gr.Row():
audio_start_time = gr.Number(label="Start Time (s)", value=0)
audio_end_time = gr.Number(label="End Time (s)", info="Set to 0 for full duration")
with gr.Row():
audio_fade_in = gr.Slider(0.0, 10.0, 0.5, step=0.1, label="Fade-In Duration (s)")
audio_fade_out = gr.Slider(0.0, 10.0, 1.0, step=0.1, label="Fade-Out Duration (s)")
audio_trim_fade_btn = gr.Button("βœ‚οΈ Process Audio", variant="primary")
with gr.Column():
audio_trim_output = gr.Audio(label="Processed Audio", type="filepath")
audio_trim_fade_btn.click(trim_and_fade_audio, [audio_trim_input, audio_start_time, audio_end_time, audio_fade_in, audio_fade_out], audio_trim_output).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ₯ BPM & Speed Tool"):
gr.Markdown("### Analyze Audio BPM and Adjust Speed")
gr.Info("Upload an audio file to find its BPM. Then, use the slider to create a speed-adjusted version.")
with gr.Row():
with gr.Column(scale=2):
bpm_input_audio = gr.Audio(type="filepath", label="Input Audio")
with gr.Row():
bpm_detect_btn = gr.Button("πŸ₯ Detect BPM")
bpm_display_box = gr.Textbox(label="Detected BPM", interactive=False)
gr.Markdown("#### Speed Adjustment")
with gr.Row():
bpm_speed_slider = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.05, label="Speed Multiplier")
bpm_new_display = gr.Textbox(label="New BPM Estimate", interactive=False, value="---")
bpm_apply_speed_btn = gr.Button("πŸƒ Apply Speed Change", variant="primary")
with gr.Column(scale=3):
bpm_output_audio = gr.Audio(label="Processed Audio Output", type="filepath")
bpm_input_audio.upload(lambda: ("", "---"), outputs=[bpm_display_box, bpm_new_display])
bpm_detect_btn.click(
fn=detect_bpm,
inputs=bpm_input_audio,
outputs=bpm_display_box
).then(
fn=update_new_bpm_display,
inputs=[bpm_display_box, bpm_speed_slider],
outputs=bpm_new_display
).then(fn=None, js=fire_confetti_and_sound_js)
bpm_speed_slider.release(
fn=update_new_bpm_display,
inputs=[bpm_display_box, bpm_speed_slider],
outputs=bpm_new_display
)
bpm_apply_speed_btn.click(
fn=change_audio_speed,
inputs=[bpm_input_audio, bpm_speed_slider],
outputs=bpm_output_audio
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Transcription", visible=(whisper is not None)):
gr.Markdown("## Transcribe Speech and Burn Subtitles")
gr.Info("Uses OpenAI's Whisper model with word-level timestamps. First run will download model files.")
transcribed_video_path_state = gr.State(None)
with gr.Row():
with gr.Column():
transcribe_input = gr.File(label="Upload Video or Audio File", file_types=["video", "audio"])
transcribe_model = gr.Dropdown(["tiny", "base", "small", "medium", "large"], value="base", label="Whisper Model Size")
transcribe_btn = gr.Button("πŸŽ™οΈ Transcribe", variant="primary")
with gr.Column():
with gr.Row():
transcribe_text = gr.Textbox(label="Transcription Result", lines=10, interactive=True, elem_id="transcription_textbox")
copy_transcription_btn = gr.Button("πŸ“‹ Copy")
transcribe_files = gr.File(label="Download Subtitle Files (.srt, .vtt, .ass)", file_count="multiple", interactive=False)
with gr.Accordion("πŸ”₯ Burn Subtitles onto Video", open=True, visible=False) as burn_accordion:
gr.Markdown("Set styling and burn the generated subtitles into the video.")
with gr.Row():
burn_style = gr.Radio(["Block", "Karaoke"], value="Block", label="Subtitle Style")
burn_font_size = gr.Slider(1, 10, 5, step=1, label="Relative Font Size")
with gr.Row(visible=True) as block_style_row:
burn_words_per_line = gr.Slider(1, 20, 7, step=1, label="Max Words Per Line", info="Splits long subtitle lines for better readability.")
burn_block_font_color = gr.ColorPicker(value="#FFFFFF", label="Font Color")
with gr.Row(visible=False) as karaoke_style_row:
burn_karaoke_base_color = gr.ColorPicker(value="#FFFFFF", label="Base Color")
burn_karaoke_highlight_color = gr.ColorPicker(value="#FFFF00", label="Highlight Color")
burn_btn = gr.Button("πŸ”₯ Burn Subtitles", variant="primary")
burn_output_video = gr.Video(label="Video with Burned-in Subtitles", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
def toggle_subtitle_styles(style_choice):
return gr.update(visible=(style_choice == "Block")), gr.update(visible=(style_choice == "Karaoke"))
burn_style.change(toggle_subtitle_styles, burn_style, [block_style_row, karaoke_style_row])
copy_transcription_btn.click(fn=None, js=copy_transcription_js)
transcribe_btn.click(
fn=transcribe_and_prep_burn,
inputs=[transcribe_input, transcribe_model],
outputs=[transcribe_text, transcribe_files, transcribed_video_path_state, burn_accordion]
).then(fn=None, js=fire_confetti_and_sound_js)
burn_btn.click(
fn=burn_subtitles_wrapper,
inputs=[transcribed_video_path_state, transcribe_files, burn_style, burn_font_size, burn_block_font_color, burn_words_per_line, burn_karaoke_base_color, burn_karaoke_highlight_color],
outputs=burn_output_video
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("🧠 ControlNet", elem_id="controlnet_tab"):
gr.Markdown("## ControlNet Preprocessing")
with gr.Tabs():
with gr.TabItem("Process a Video"):
gr.Markdown("### Convert a Video into a ControlNet-Ready Map")
with gr.Row():
with gr.Column():
input_video_cn = gr.Video(label="Input Video", height=UNIFIED_HEIGHT)
detector_dropdown_cn = gr.Dropdown(choices=list(DETECTOR_CONFIG.keys()), value="Canny", label="Choose Detector")
process_btn_cn = gr.Button("✨ Process Video", variant="primary")
with gr.Column():
output_video_cn = gr.Video(label="Output ControlNet Video", interactive=True, show_download_button=True, height=UNIFIED_HEIGHT)
process_btn_cn.click(fn=process_video_with_detector, inputs=[input_video_cn, detector_dropdown_cn], outputs=output_video_cn).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Process Batch Images"):
gr.Markdown("### Generate ControlNet Maps from one or more images.")
with gr.Row():
with gr.Column():
input_images_cn = gr.File(label="Upload Images or Folder", file_count="multiple", file_types=["image"])
detector_dropdown_img = gr.Dropdown(choices=list(DETECTOR_CONFIG.keys()), value="Canny", label="Choose Detector")
process_btn_img = gr.Button("✨ Process Images", variant="primary")
with gr.Column():
output_gallery_cn = gr.Gallery(label="Output ControlNet Images", columns=4, object_fit="contain", height="auto")
output_zip_cn = gr.File(label="Download All as .zip", interactive=False)
process_btn_img.click(fn=process_batch_images_with_detector, inputs=[input_images_cn, detector_dropdown_img], outputs=[output_gallery_cn, output_zip_cn]).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ—œοΈ Converter", elem_id="converter_tab"):
gr.Markdown("## Universal Media Converter & Compressor")
gr.Info("Convert your video or audio files to different formats, change codecs, and adjust quality to save space.")
with gr.Tabs():
with gr.TabItem("Batch Video Converter"):
with gr.Row():
with gr.Column():
conv_input_videos = gr.File(label="Upload Videos", file_count="multiple", file_types=["video"])
conv_btn = gr.Button("βš™οΈ Convert & Compress Videos", variant="primary")
conv_output_gallery = gr.Gallery(label="Converted Videos Preview", columns=2, object_fit="contain")
conv_output_zip = gr.File(label="Download All as .zip", interactive=False)
with gr.Column():
gr.Markdown("#### Output Settings")
with gr.Row():
conv_format = gr.Dropdown(["mp4", "mkv", "webm", "mov"], value="mp4", label="Output Format")
conv_vcodec = gr.Dropdown(["libx264", "libx265", "vp9"], value="libx264", label="Video Codec")
conv_crf = gr.Slider(minimum=18, maximum=30, value=23, step=1, label="Quality (CRF)", info="Lower = higher quality/size, Higher = lower quality/size. 23 is a good default.")
conv_scale = gr.Dropdown(["Original", "1080p", "720p", "480p"], value="Original", label="Downscale Resolution (optional)")
gr.Markdown("##### Audio Settings")
with gr.Row():
conv_acodec = gr.Dropdown(["copy", "aac", "opus"], value="copy", label="Audio Codec", info="'copy' is fastest and preserves quality.")
conv_abitrate = gr.Dropdown([96, 128, 192, 256, 320], value=192, label="Audio Bitrate (kbps)", interactive=False)
conv_acodec.change(lambda x: gr.update(interactive=(x != "copy")), conv_acodec, conv_abitrate)
conv_btn.click(
fn=batch_convert_compress_videos,
inputs=[conv_input_videos, conv_format, conv_vcodec, conv_crf, conv_scale, conv_acodec, conv_abitrate],
outputs=[conv_output_gallery, conv_output_zip],
show_progress="full"
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("Batch Audio Converter / Extractor"):
gr.Markdown("### Batch Audio Converter & Extractor")
gr.Info("Upload multiple audio OR video files. The audio will be extracted from videos and all files will be converted to your chosen format.")
with gr.Row():
with gr.Column(scale=2):
audio_conv_inputs = gr.File(label="Upload Audio or Video Files", file_count="multiple", file_types=["audio", "video"])
with gr.Row():
audio_conv_format = gr.Dropdown(["mp3", "wav", "aac", "flac", "ogg"], value="mp3", label="Output Format")
audio_conv_bitrate = gr.Dropdown([96, 128, 192, 256, 320], value=192, label="Bitrate (kbps)", info="Higher = better quality. Not used for WAV/FLAC.")
audio_conv_btn = gr.Button("🎡 Convert All Files", variant="primary")
with gr.Column(scale=3):
audio_conv_output_player = gr.Audio(label="Preview First Converted File", type="filepath")
audio_conv_output_zip = gr.File(label="Download All as .zip", interactive=False)
audio_conv_format.change(
fn=lambda fmt: gr.update(interactive=(fmt not in ["wav", "flac"])),
inputs=audio_conv_format,
outputs=audio_conv_bitrate
)
audio_conv_btn.click(
fn=batch_convert_audio,
inputs=[audio_conv_inputs, audio_conv_format, audio_conv_bitrate],
outputs=[audio_conv_output_player, audio_conv_output_zip],
show_progress="full"
).then(fn=None, js=fire_confetti_and_sound_js)
with gr.TabItem("πŸ”— Transfer", elem_id="transfer_tab"):
gr.Markdown("## Image & Link Transfer Utility")
gr.Info("Drop images below, manage URL presets, and open the target application in a new tab.")
link_presets = gr.State(DEFAULT_LINK_PRESETS.copy())
with gr.Row():
with gr.Column(scale=1):
transfer_gallery = gr.Gallery(label="Drop Images Here", height=300, columns=3, object_fit="contain")
with gr.Column(scale=2):
gr.Markdown("### Link Preset Management")
target_url = gr.Textbox(label="Target URL", value="https://huggingface.co/spaces/bep40/FramePack_rotate_landscape", interactive=True, elem_id="transfer_target_url")
search_bar = gr.Textbox(label="Search Presets", placeholder="Type to filter...", interactive=True)
with gr.Row():
preset_dropdown = gr.Dropdown(
label="Load Link Preset",
choices=sorted(list(DEFAULT_LINK_PRESETS.keys())),
interactive=True
)
delete_preset_btn = gr.Button("πŸ—‘οΈ Delete", variant="stop")
with gr.Accordion("Create a new preset", open=False):
with gr.Row():
new_preset_name = gr.Textbox(label="New Preset Name", placeholder="e.g., My Favorite App")
save_preset_btn = gr.Button("πŸ’Ύ Save")
open_link_btn = gr.Button("πŸš€ Open in New Tab", variant="primary")
search_bar.input(fn=filter_presets, inputs=[search_bar, link_presets], outputs=[preset_dropdown])
preset_dropdown.change(fn=load_preset, inputs=[link_presets, preset_dropdown], outputs=[target_url])
save_preset_btn.click(
fn=save_preset, inputs=[link_presets, new_preset_name, target_url], outputs=[link_presets, preset_dropdown]
).then(lambda: ("", ""), outputs=[new_preset_name, search_bar])
delete_confirm_js = """(name) => { if (!name) { alert('Please select a preset to delete.'); return false; } return confirm(`Are you sure you want to delete the preset: '` + name + `'?`); }"""
delete_preset_btn.click(fn=None, js=delete_confirm_js, inputs=[preset_dropdown]).then(
fn=delete_preset, inputs=[link_presets, preset_dropdown], outputs=[link_presets, preset_dropdown, target_url]
).then(lambda: "", outputs=[search_bar])
open_link_btn.click(fn=None, js="()=>{const url=document.getElementById('transfer_target_url').querySelector('textarea').value;if(url){window.open(url,'_blank')}else{alert('Target URL is empty.')}}")
main_tabs.select(fn=None, inputs=main_tabs, js="(tab) => { window.skriptz_bling.update_title(tab); }")
gr.HTML('<a href="https://linktr.ee/skylinkd" target="_blank" style="color: #94a3b8; text-decoration: none;">skylinkd production 2025 (c)</a>', elem_id="custom-footer")
if __name__ == "__main__":
if os.path.exists(TEMP_DIR):
try: shutil.rmtree(TEMP_DIR)
except OSError as e: print(f"Error removing temp directory {TEMP_DIR}: {e}")
os.makedirs(TEMP_DIR, exist_ok=True)
if whisper:
load_whisper_model("base") # Pre-load the default model on startup
demo.launch(inbrowser=True)