"""Command-line interface for Audio Video Generator.""" import logging import os import sys from pathlib import Path from typing import List, Optional import click from tqdm import tqdm from audio_video_generator import __version__ from audio_video_generator.config import ( ANIMATION_OPTIONS, ANIMATION_RANDOM_POOL, DEFAULT_OUTPUT_NAME, RESOLUTION_MAP, TRANSITION_OPTIONS, TRANSITION_RANDOM_POOL, ) from audio_video_generator.core.pipeline import ( VideoPipeline, VideoPipelineConfig, ) def setup_logging(verbose: bool = False) -> None: """Configure logging.""" level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( level=level, format="%(levelname)s: %(message)s", handlers=[logging.StreamHandler(sys.stdout)] ) def validate_file(ctx, param, value): """Validate file exists.""" if value is None: return None if not os.path.exists(value): raise click.BadParameter(f"File not found: {value}") return value @click.group() @click.version_option(version=__version__, prog_name="avg") @click.option("--verbose", "-v", is_flag=True, help="Enable verbose output") @click.pass_context def cli(ctx: click.Context, verbose: bool) -> None: """Audio Video Generator - Synchronize images to audio. Generate videos by synchronizing images to audio using Whisper transcription and CSV mapping files. Example: avg generate -a audio.mp3 -c mapping.csv -i images.zip -o output.mp4 """ ctx.ensure_object(dict) ctx.obj["verbose"] = verbose setup_logging(verbose) @cli.command() @click.option( "--audio", "-a", required=True, type=click.Path(exists=True, dir_okay=False), help="Path to audio file (mp3, wav, m4a, etc.)" ) @click.option( "--csv", "-c", required=True, type=click.Path(exists=True, dir_okay=False), help="Path to CSV mapping file (text, image columns)" ) @click.option( "--images", "-i", type=click.Path(exists=True), help="Path to images (ZIP file or directory)" ) @click.option( "--output", "-o", default=DEFAULT_OUTPUT_NAME, help=f"Output video filename (default: {DEFAULT_OUTPUT_NAME})" ) @click.option( "--resolution", "-r", type=click.Choice(["landscape", "portrait", "square"]), default="landscape", help="Output resolution preset" ) @click.option( "--animation-mode", type=click.Choice(["single", "custom", "random"]), default="random", help="Image animation selection mode" ) @click.option( "--animation", type=click.Choice(ANIMATION_OPTIONS), help="Single animation to use (with --animation-mode=single)" ) @click.option( "--animations", multiple=True, type=click.Choice(ANIMATION_RANDOM_POOL), help="Custom animations to cycle through (with --animation-mode=custom)" ) @click.option( "--transition-mode", type=click.Choice(["single", "custom", "random"]), default="random", help="Image transition selection mode" ) @click.option( "--transition", type=click.Choice(TRANSITION_OPTIONS), help="Single transition to use (with --transition-mode=single)" ) @click.option( "--transitions", multiple=True, type=click.Choice(TRANSITION_RANDOM_POOL), help="Custom transitions to cycle through (with --transition-mode=custom)" ) @click.option( "--txt-overlay", type=click.Path(exists=True, dir_okay=False), help="Path to TXT file for text overlay (one phrase per line)" ) @click.option( "--font-size", type=int, default=56, help="Text overlay font size" ) @click.option( "--text-color", default="#FFFFFF", help="Text overlay color (hex)" ) @click.option( "--text-pos-x", type=float, default=0.5, help="Text horizontal position (0.0 to 1.0)" ) @click.option( "--text-pos-y", type=float, default=0.5, help="Text vertical position (0.0 to 1.0)" ) @click.option( "--whisper-model", default="base", help="Whisper model size (tiny, base, small, medium, large)" ) @click.option( "--work-dir", type=click.Path(), default="./avg_runs", help="Working directory for outputs and checkpoints" ) @click.option( "--fps", type=int, default=24, help="Output video framerate" ) @click.option( "--save-checkpoints/--no-checkpoints", default=True, help="Save checkpoint files" ) @click.option( "--keep-work-dir/--clean-work-dir", default=False, help="Keep working directory after completion" ) @click.pass_context def generate( ctx: click.Context, audio: str, csv: str, images: Optional[str], output: str, resolution: str, animation_mode: str, animation: Optional[str], animations: tuple, transition_mode: str, transition: Optional[str], transitions: tuple, txt_overlay: Optional[str], font_size: int, text_color: str, text_pos_x: float, text_pos_y: float, whisper_model: str, work_dir: str, fps: int, save_checkpoints: bool, keep_work_dir: bool ) -> None: """Generate video from audio, images, and CSV mapping.""" verbose = ctx.obj.get("verbose", False) # Validate images input if images is None: click.echo("Error: --images is required (ZIP file or directory)", err=True) sys.exit(1) # Determine image input mode if images.endswith(".zip"): input_mode = "ZIP" elif os.path.isdir(images): input_mode = "MANUAL" else: click.echo(f"Error: Images must be a ZIP file or directory: {images}", err=True) sys.exit(1) # Build text style config text_style = { "font_size": font_size, "text_color": text_color, "pos_x": text_pos_x, "pos_y": text_pos_y, } # Build pipeline config config = VideoPipelineConfig( audio_path=audio, csv_path=csv, input_mode=input_mode, zip_path=images if input_mode == "ZIP" else None, manual_images_dir=images if input_mode == "MANUAL" else None, output_filename=output, resolution=resolution, animation_mode=animation_mode, single_animation=animation, custom_animations=list(animations) if animations else None, transition_mode=transition_mode, single_transition=transition, custom_transitions=list(transitions) if transitions else None, txt_path=txt_overlay, enable_text_overlay=txt_overlay is not None, text_style=text_style if txt_overlay else None, whisper_model=whisper_model, work_root=work_dir, fps=fps, save_checkpoints=save_checkpoints, keep_work_dir=keep_work_dir, ) # Run pipeline pipeline = VideoPipeline(config) try: click.echo(f"Starting video generation...") click.echo(f" Audio: {audio}") click.echo(f" CSV: {csv}") click.echo(f" Images: {images} ({input_mode} mode)") click.echo(f" Output: {output}") click.echo(f" Resolution: {resolution} ({RESOLUTION_MAP[resolution][0]}x{RESOLUTION_MAP[resolution][1]})") result = pipeline.run(progress_callback=lambda msg, pct: click.echo(f" [{pct*100:5.1f}%] {msg}")) click.echo(f"\nSuccess! Video saved to: {result['output_path']}") if result.get("drive_path"): click.echo(f"Also saved to Drive: {result['drive_path']}") if verbose and result.get("report"): click.echo("\n--- Processing Report ---") click.echo(result["report"]) except Exception as e: click.echo(f"\nError: {e}", err=True) if verbose: import traceback click.echo(traceback.format_exc(), err=True) sys.exit(1) @cli.command() @click.option( "--port", "-p", type=int, default=7860, help="Port to run web UI on" ) @click.option( "--host", "-h", default="127.0.0.1", help="Host to bind to" ) @click.option( "--share/--no-share", default=False, help="Create public shareable link" ) def web(port: int, host: str, share: bool) -> None: """Launch Gradio web interface.""" try: from audio_video_generator.web.gradio_ui import launch_ui click.echo(f"Starting web UI on http://{host}:{port}") launch_ui(host=host, port=port, share=share) except ImportError as e: click.echo(f"Error: Could not load web UI - {e}", err=True) sys.exit(1) @cli.command() def models() -> None: """List available Whisper models.""" models_info = [ ("tiny", "39 MB", "Fastest, lowest accuracy"), ("base", "74 MB", "Good balance (default)"), ("small", "244 MB", "Better accuracy"), ("medium", "769 MB", "High accuracy"), ("large", "1550 MB", "Best accuracy, slowest"), ] click.echo("Available Whisper models:") click.echo() for name, size, desc in models_info: marker = " -> " if name == "base" else " " click.echo(f"{marker}{name:8} {size:10} {desc}") click.echo() click.echo("Use with: avg generate --whisper-model ") @cli.command() def animations() -> None: """List available animations and transitions.""" click.echo("Image Animations:") for anim in ANIMATION_OPTIONS: click.echo(f" - {anim}") click.echo() click.echo("Image Transitions:") for trans in TRANSITION_OPTIONS: click.echo(f" - {trans}") def main() -> None: """Entry point for the CLI.""" cli() if __name__ == "__main__": main()