File size: 6,384 Bytes
f6f2120 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | """
Command-line interface for lyric_sync.
"""
import argparse
import logging
import os
import sys
from lyric_sync.pipeline import LyricSyncPipeline
def main():
parser = argparse.ArgumentParser(
prog="lyric-sync",
description="Automatic perfect song lyric acquisition and synchronization.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Full automatic pipeline (requires AcoustID key + GPU)
lyric-sync song.mp3 --acoustid-key YOUR_KEY --output synced.lrc
# With known metadata (skip identification)
lyric-sync song.mp3 --artist "Queen" --title "Bohemian Rhapsody" -o synced.lrc
# CPU-only, simpler transcription
lyric-sync song.mp3 --device cpu --backend whisper --artist "Artist" --title "Title"
# JSON output for programmatic use
lyric-sync song.mp3 --format json --artist "Artist" --title "Title" -o synced.json
# ASS karaoke subtitles
lyric-sync song.mp3 --format ass --artist "Artist" --title "Title" -o karaoke.ass
""",
)
parser.add_argument("audio", help="Path to audio file (mp3, flac, wav, etc.)")
parser.add_argument("-o", "--output", help="Output file path (default: stdout)")
parser.add_argument(
"--format",
choices=["lrc", "lrc_standard", "json", "srt", "ass"],
default="lrc",
help="Output format (default: lrc = enhanced word-level LRC)",
)
# Metadata overrides
meta = parser.add_argument_group("song metadata (skip identification)")
meta.add_argument("--artist", help="Artist name")
meta.add_argument("--title", help="Track title")
meta.add_argument("--album", help="Album name")
# API keys
keys = parser.add_argument_group("API keys")
keys.add_argument(
"--acoustid-key",
default=os.environ.get("ACOUSTID_API_KEY"),
help="AcoustID API key (or set ACOUSTID_API_KEY env var)",
)
keys.add_argument(
"--genius-token",
default=os.environ.get("GENIUS_TOKEN"),
help="Genius API token (or set GENIUS_TOKEN env var)",
)
# Model configuration
model = parser.add_argument_group("model configuration")
model.add_argument(
"--backend",
choices=["whisperx", "whisper", "granite"],
default="whisperx",
help="Transcription backend (default: whisperx)",
)
model.add_argument(
"--whisper-model",
default="large-v2",
help="Whisper model size (default: large-v2, best for lyrics)",
)
model.add_argument(
"--demucs-model",
default="htdemucs_ft",
help="Demucs model (default: htdemucs_ft, best quality)",
)
model.add_argument(
"--device",
default="cuda",
help="Device: cuda or cpu (default: cuda)",
)
model.add_argument(
"--language",
default="en",
help="Language code (default: en)",
)
# Processing options
proc = parser.add_argument_group("processing options")
proc.add_argument(
"--no-refine",
action="store_true",
help="Skip onset/offset timing refinement",
)
proc.add_argument(
"--line-break-gap",
type=float,
default=1.0,
help="Seconds of silence to trigger line break in output (default: 1.0)",
)
# Verbosity
parser.add_argument(
"-v", "--verbose",
action="count",
default=0,
help="Increase verbosity (-v info, -vv debug)",
)
parser.add_argument(
"-q", "--quiet",
action="store_true",
help="Suppress all logging",
)
args = parser.parse_args()
# Configure logging
if args.quiet:
log_level = logging.CRITICAL
elif args.verbose >= 2:
log_level = logging.DEBUG
elif args.verbose >= 1:
log_level = logging.INFO
else:
log_level = logging.WARNING
logging.basicConfig(
level=log_level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
# Validate inputs
if not os.path.isfile(args.audio):
print(f"Error: Audio file not found: {args.audio}", file=sys.stderr)
sys.exit(1)
if not args.artist and not args.title and not args.acoustid_key:
print(
"Warning: No --artist/--title and no --acoustid-key provided. "
"Song identification may fail. Will use raw transcription as fallback.",
file=sys.stderr,
)
# Run pipeline
pipeline = LyricSyncPipeline(
acoustid_key=args.acoustid_key,
genius_token=args.genius_token,
transcription_backend=args.backend,
whisper_model=args.whisper_model,
demucs_model=args.demucs_model,
device=args.device,
refine_timings_enabled=not args.no_refine,
language=args.language,
)
result = pipeline.sync(
audio_path=args.audio,
artist=args.artist,
title=args.title,
album=args.album,
output_format=args.format,
output_path=args.output,
)
# Print to stdout if no output file specified
if not args.output:
formatters = {
"lrc": lambda: result.to_lrc(enhanced=True, line_break_gap=args.line_break_gap),
"lrc_standard": lambda: result.to_lrc(enhanced=False, line_break_gap=args.line_break_gap),
"json": lambda: result.to_json(),
"srt": lambda: result.to_srt(line_break_gap=args.line_break_gap),
"ass": lambda: result.to_ass(line_break_gap=args.line_break_gap),
}
print(formatters[args.format]())
# Print summary to stderr
if not args.quiet:
print(f"\n--- Sync Summary ---", file=sys.stderr)
if result.song:
print(f"Song: {result.song.artist} - {result.song.title}", file=sys.stderr)
print(f"Identified via: {result.song.method}", file=sys.stderr)
print(f"Words: {len(result.synced_words)}", file=sys.stderr)
print(f"Quality: {result.quality_score:.2f}", file=sys.stderr)
if result.alignment_stats:
print(f"Direct matches: {result.alignment_stats.match_rate:.1%}", file=sys.stderr)
print(f"Coverage: {result.alignment_stats.coverage:.1%}", file=sys.stderr)
print(f"Time: {result.processing_time_sec:.1f}s", file=sys.stderr)
if __name__ == "__main__":
main()
|