Nanny7's picture
Initial commit: Audio Video Generator v1.0.0
929f41f
Raw
History Blame Contribute Delete
9.69 kB
"""Command-line interface for Audio Video Generator."""
import logging
import os
import sys
from pathlib import Path
from typing import List, Optional
import click
from tqdm import tqdm
from audio_video_generator import __version__
from audio_video_generator.config import (
ANIMATION_OPTIONS,
ANIMATION_RANDOM_POOL,
DEFAULT_OUTPUT_NAME,
RESOLUTION_MAP,
TRANSITION_OPTIONS,
TRANSITION_RANDOM_POOL,
)
from audio_video_generator.core.pipeline import (
VideoPipeline,
VideoPipelineConfig,
)
def setup_logging(verbose: bool = False) -> None:
"""Configure logging."""
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(levelname)s: %(message)s",
handlers=[logging.StreamHandler(sys.stdout)]
)
def validate_file(ctx, param, value):
"""Validate file exists."""
if value is None:
return None
if not os.path.exists(value):
raise click.BadParameter(f"File not found: {value}")
return value
@click.group()
@click.version_option(version=__version__, prog_name="avg")
@click.option("--verbose", "-v", is_flag=True, help="Enable verbose output")
@click.pass_context
def cli(ctx: click.Context, verbose: bool) -> None:
"""Audio Video Generator - Synchronize images to audio.
Generate videos by synchronizing images to audio using Whisper
transcription and CSV mapping files.
Example:
avg generate -a audio.mp3 -c mapping.csv -i images.zip -o output.mp4
"""
ctx.ensure_object(dict)
ctx.obj["verbose"] = verbose
setup_logging(verbose)
@cli.command()
@click.option(
"--audio", "-a",
required=True,
type=click.Path(exists=True, dir_okay=False),
help="Path to audio file (mp3, wav, m4a, etc.)"
)
@click.option(
"--csv", "-c",
required=True,
type=click.Path(exists=True, dir_okay=False),
help="Path to CSV mapping file (text, image columns)"
)
@click.option(
"--images", "-i",
type=click.Path(exists=True),
help="Path to images (ZIP file or directory)"
)
@click.option(
"--output", "-o",
default=DEFAULT_OUTPUT_NAME,
help=f"Output video filename (default: {DEFAULT_OUTPUT_NAME})"
)
@click.option(
"--resolution", "-r",
type=click.Choice(["landscape", "portrait", "square"]),
default="landscape",
help="Output resolution preset"
)
@click.option(
"--animation-mode",
type=click.Choice(["single", "custom", "random"]),
default="random",
help="Image animation selection mode"
)
@click.option(
"--animation",
type=click.Choice(ANIMATION_OPTIONS),
help="Single animation to use (with --animation-mode=single)"
)
@click.option(
"--animations",
multiple=True,
type=click.Choice(ANIMATION_RANDOM_POOL),
help="Custom animations to cycle through (with --animation-mode=custom)"
)
@click.option(
"--transition-mode",
type=click.Choice(["single", "custom", "random"]),
default="random",
help="Image transition selection mode"
)
@click.option(
"--transition",
type=click.Choice(TRANSITION_OPTIONS),
help="Single transition to use (with --transition-mode=single)"
)
@click.option(
"--transitions",
multiple=True,
type=click.Choice(TRANSITION_RANDOM_POOL),
help="Custom transitions to cycle through (with --transition-mode=custom)"
)
@click.option(
"--txt-overlay",
type=click.Path(exists=True, dir_okay=False),
help="Path to TXT file for text overlay (one phrase per line)"
)
@click.option(
"--font-size",
type=int,
default=56,
help="Text overlay font size"
)
@click.option(
"--text-color",
default="#FFFFFF",
help="Text overlay color (hex)"
)
@click.option(
"--text-pos-x",
type=float,
default=0.5,
help="Text horizontal position (0.0 to 1.0)"
)
@click.option(
"--text-pos-y",
type=float,
default=0.5,
help="Text vertical position (0.0 to 1.0)"
)
@click.option(
"--whisper-model",
default="base",
help="Whisper model size (tiny, base, small, medium, large)"
)
@click.option(
"--work-dir",
type=click.Path(),
default="./avg_runs",
help="Working directory for outputs and checkpoints"
)
@click.option(
"--fps",
type=int,
default=24,
help="Output video framerate"
)
@click.option(
"--save-checkpoints/--no-checkpoints",
default=True,
help="Save checkpoint files"
)
@click.option(
"--keep-work-dir/--clean-work-dir",
default=False,
help="Keep working directory after completion"
)
@click.pass_context
def generate(
ctx: click.Context,
audio: str,
csv: str,
images: Optional[str],
output: str,
resolution: str,
animation_mode: str,
animation: Optional[str],
animations: tuple,
transition_mode: str,
transition: Optional[str],
transitions: tuple,
txt_overlay: Optional[str],
font_size: int,
text_color: str,
text_pos_x: float,
text_pos_y: float,
whisper_model: str,
work_dir: str,
fps: int,
save_checkpoints: bool,
keep_work_dir: bool
) -> None:
"""Generate video from audio, images, and CSV mapping."""
verbose = ctx.obj.get("verbose", False)
# Validate images input
if images is None:
click.echo("Error: --images is required (ZIP file or directory)", err=True)
sys.exit(1)
# Determine image input mode
if images.endswith(".zip"):
input_mode = "ZIP"
elif os.path.isdir(images):
input_mode = "MANUAL"
else:
click.echo(f"Error: Images must be a ZIP file or directory: {images}", err=True)
sys.exit(1)
# Build text style config
text_style = {
"font_size": font_size,
"text_color": text_color,
"pos_x": text_pos_x,
"pos_y": text_pos_y,
}
# Build pipeline config
config = VideoPipelineConfig(
audio_path=audio,
csv_path=csv,
input_mode=input_mode,
zip_path=images if input_mode == "ZIP" else None,
manual_images_dir=images if input_mode == "MANUAL" else None,
output_filename=output,
resolution=resolution,
animation_mode=animation_mode,
single_animation=animation,
custom_animations=list(animations) if animations else None,
transition_mode=transition_mode,
single_transition=transition,
custom_transitions=list(transitions) if transitions else None,
txt_path=txt_overlay,
enable_text_overlay=txt_overlay is not None,
text_style=text_style if txt_overlay else None,
whisper_model=whisper_model,
work_root=work_dir,
fps=fps,
save_checkpoints=save_checkpoints,
keep_work_dir=keep_work_dir,
)
# Run pipeline
pipeline = VideoPipeline(config)
try:
click.echo(f"Starting video generation...")
click.echo(f" Audio: {audio}")
click.echo(f" CSV: {csv}")
click.echo(f" Images: {images} ({input_mode} mode)")
click.echo(f" Output: {output}")
click.echo(f" Resolution: {resolution} ({RESOLUTION_MAP[resolution][0]}x{RESOLUTION_MAP[resolution][1]})")
result = pipeline.run(progress_callback=lambda msg, pct: click.echo(f" [{pct*100:5.1f}%] {msg}"))
click.echo(f"\nSuccess! Video saved to: {result['output_path']}")
if result.get("drive_path"):
click.echo(f"Also saved to Drive: {result['drive_path']}")
if verbose and result.get("report"):
click.echo("\n--- Processing Report ---")
click.echo(result["report"])
except Exception as e:
click.echo(f"\nError: {e}", err=True)
if verbose:
import traceback
click.echo(traceback.format_exc(), err=True)
sys.exit(1)
@cli.command()
@click.option(
"--port", "-p",
type=int,
default=7860,
help="Port to run web UI on"
)
@click.option(
"--host", "-h",
default="127.0.0.1",
help="Host to bind to"
)
@click.option(
"--share/--no-share",
default=False,
help="Create public shareable link"
)
def web(port: int, host: str, share: bool) -> None:
"""Launch Gradio web interface."""
try:
from audio_video_generator.web.gradio_ui import launch_ui
click.echo(f"Starting web UI on http://{host}:{port}")
launch_ui(host=host, port=port, share=share)
except ImportError as e:
click.echo(f"Error: Could not load web UI - {e}", err=True)
sys.exit(1)
@cli.command()
def models() -> None:
"""List available Whisper models."""
models_info = [
("tiny", "39 MB", "Fastest, lowest accuracy"),
("base", "74 MB", "Good balance (default)"),
("small", "244 MB", "Better accuracy"),
("medium", "769 MB", "High accuracy"),
("large", "1550 MB", "Best accuracy, slowest"),
]
click.echo("Available Whisper models:")
click.echo()
for name, size, desc in models_info:
marker = " -> " if name == "base" else " "
click.echo(f"{marker}{name:8} {size:10} {desc}")
click.echo()
click.echo("Use with: avg generate --whisper-model <model>")
@cli.command()
def animations() -> None:
"""List available animations and transitions."""
click.echo("Image Animations:")
for anim in ANIMATION_OPTIONS:
click.echo(f" - {anim}")
click.echo()
click.echo("Image Transitions:")
for trans in TRANSITION_OPTIONS:
click.echo(f" - {trans}")
def main() -> None:
"""Entry point for the CLI."""
cli()
if __name__ == "__main__":
main()