rikhoffbauer2's picture
Upload lyric_sync/cli.py
f6f2120 verified
"""
Command-line interface for lyric_sync.
"""
import argparse
import logging
import os
import sys
from lyric_sync.pipeline import LyricSyncPipeline
def main():
parser = argparse.ArgumentParser(
prog="lyric-sync",
description="Automatic perfect song lyric acquisition and synchronization.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Full automatic pipeline (requires AcoustID key + GPU)
lyric-sync song.mp3 --acoustid-key YOUR_KEY --output synced.lrc
# With known metadata (skip identification)
lyric-sync song.mp3 --artist "Queen" --title "Bohemian Rhapsody" -o synced.lrc
# CPU-only, simpler transcription
lyric-sync song.mp3 --device cpu --backend whisper --artist "Artist" --title "Title"
# JSON output for programmatic use
lyric-sync song.mp3 --format json --artist "Artist" --title "Title" -o synced.json
# ASS karaoke subtitles
lyric-sync song.mp3 --format ass --artist "Artist" --title "Title" -o karaoke.ass
""",
)
parser.add_argument("audio", help="Path to audio file (mp3, flac, wav, etc.)")
parser.add_argument("-o", "--output", help="Output file path (default: stdout)")
parser.add_argument(
"--format",
choices=["lrc", "lrc_standard", "json", "srt", "ass"],
default="lrc",
help="Output format (default: lrc = enhanced word-level LRC)",
)
# Metadata overrides
meta = parser.add_argument_group("song metadata (skip identification)")
meta.add_argument("--artist", help="Artist name")
meta.add_argument("--title", help="Track title")
meta.add_argument("--album", help="Album name")
# API keys
keys = parser.add_argument_group("API keys")
keys.add_argument(
"--acoustid-key",
default=os.environ.get("ACOUSTID_API_KEY"),
help="AcoustID API key (or set ACOUSTID_API_KEY env var)",
)
keys.add_argument(
"--genius-token",
default=os.environ.get("GENIUS_TOKEN"),
help="Genius API token (or set GENIUS_TOKEN env var)",
)
# Model configuration
model = parser.add_argument_group("model configuration")
model.add_argument(
"--backend",
choices=["whisperx", "whisper", "granite"],
default="whisperx",
help="Transcription backend (default: whisperx)",
)
model.add_argument(
"--whisper-model",
default="large-v2",
help="Whisper model size (default: large-v2, best for lyrics)",
)
model.add_argument(
"--demucs-model",
default="htdemucs_ft",
help="Demucs model (default: htdemucs_ft, best quality)",
)
model.add_argument(
"--device",
default="cuda",
help="Device: cuda or cpu (default: cuda)",
)
model.add_argument(
"--language",
default="en",
help="Language code (default: en)",
)
# Processing options
proc = parser.add_argument_group("processing options")
proc.add_argument(
"--no-refine",
action="store_true",
help="Skip onset/offset timing refinement",
)
proc.add_argument(
"--line-break-gap",
type=float,
default=1.0,
help="Seconds of silence to trigger line break in output (default: 1.0)",
)
# Verbosity
parser.add_argument(
"-v", "--verbose",
action="count",
default=0,
help="Increase verbosity (-v info, -vv debug)",
)
parser.add_argument(
"-q", "--quiet",
action="store_true",
help="Suppress all logging",
)
args = parser.parse_args()
# Configure logging
if args.quiet:
log_level = logging.CRITICAL
elif args.verbose >= 2:
log_level = logging.DEBUG
elif args.verbose >= 1:
log_level = logging.INFO
else:
log_level = logging.WARNING
logging.basicConfig(
level=log_level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
# Validate inputs
if not os.path.isfile(args.audio):
print(f"Error: Audio file not found: {args.audio}", file=sys.stderr)
sys.exit(1)
if not args.artist and not args.title and not args.acoustid_key:
print(
"Warning: No --artist/--title and no --acoustid-key provided. "
"Song identification may fail. Will use raw transcription as fallback.",
file=sys.stderr,
)
# Run pipeline
pipeline = LyricSyncPipeline(
acoustid_key=args.acoustid_key,
genius_token=args.genius_token,
transcription_backend=args.backend,
whisper_model=args.whisper_model,
demucs_model=args.demucs_model,
device=args.device,
refine_timings_enabled=not args.no_refine,
language=args.language,
)
result = pipeline.sync(
audio_path=args.audio,
artist=args.artist,
title=args.title,
album=args.album,
output_format=args.format,
output_path=args.output,
)
# Print to stdout if no output file specified
if not args.output:
formatters = {
"lrc": lambda: result.to_lrc(enhanced=True, line_break_gap=args.line_break_gap),
"lrc_standard": lambda: result.to_lrc(enhanced=False, line_break_gap=args.line_break_gap),
"json": lambda: result.to_json(),
"srt": lambda: result.to_srt(line_break_gap=args.line_break_gap),
"ass": lambda: result.to_ass(line_break_gap=args.line_break_gap),
}
print(formatters[args.format]())
# Print summary to stderr
if not args.quiet:
print(f"\n--- Sync Summary ---", file=sys.stderr)
if result.song:
print(f"Song: {result.song.artist} - {result.song.title}", file=sys.stderr)
print(f"Identified via: {result.song.method}", file=sys.stderr)
print(f"Words: {len(result.synced_words)}", file=sys.stderr)
print(f"Quality: {result.quality_score:.2f}", file=sys.stderr)
if result.alignment_stats:
print(f"Direct matches: {result.alignment_stats.match_rate:.1%}", file=sys.stderr)
print(f"Coverage: {result.alignment_stats.coverage:.1%}", file=sys.stderr)
print(f"Time: {result.processing_time_sec:.1f}s", file=sys.stderr)
if __name__ == "__main__":
main()