File size: 6,384 Bytes
f6f2120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
"""
Command-line interface for lyric_sync.
"""

import argparse
import logging
import os
import sys

from lyric_sync.pipeline import LyricSyncPipeline


def main():
    parser = argparse.ArgumentParser(
        prog="lyric-sync",
        description="Automatic perfect song lyric acquisition and synchronization.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Full automatic pipeline (requires AcoustID key + GPU)
  lyric-sync song.mp3 --acoustid-key YOUR_KEY --output synced.lrc

  # With known metadata (skip identification)
  lyric-sync song.mp3 --artist "Queen" --title "Bohemian Rhapsody" -o synced.lrc

  # CPU-only, simpler transcription
  lyric-sync song.mp3 --device cpu --backend whisper --artist "Artist" --title "Title"

  # JSON output for programmatic use
  lyric-sync song.mp3 --format json --artist "Artist" --title "Title" -o synced.json

  # ASS karaoke subtitles
  lyric-sync song.mp3 --format ass --artist "Artist" --title "Title" -o karaoke.ass
""",
    )

    parser.add_argument("audio", help="Path to audio file (mp3, flac, wav, etc.)")
    parser.add_argument("-o", "--output", help="Output file path (default: stdout)")
    parser.add_argument(
        "--format",
        choices=["lrc", "lrc_standard", "json", "srt", "ass"],
        default="lrc",
        help="Output format (default: lrc = enhanced word-level LRC)",
    )

    # Metadata overrides
    meta = parser.add_argument_group("song metadata (skip identification)")
    meta.add_argument("--artist", help="Artist name")
    meta.add_argument("--title", help="Track title")
    meta.add_argument("--album", help="Album name")

    # API keys
    keys = parser.add_argument_group("API keys")
    keys.add_argument(
        "--acoustid-key",
        default=os.environ.get("ACOUSTID_API_KEY"),
        help="AcoustID API key (or set ACOUSTID_API_KEY env var)",
    )
    keys.add_argument(
        "--genius-token",
        default=os.environ.get("GENIUS_TOKEN"),
        help="Genius API token (or set GENIUS_TOKEN env var)",
    )

    # Model configuration
    model = parser.add_argument_group("model configuration")
    model.add_argument(
        "--backend",
        choices=["whisperx", "whisper", "granite"],
        default="whisperx",
        help="Transcription backend (default: whisperx)",
    )
    model.add_argument(
        "--whisper-model",
        default="large-v2",
        help="Whisper model size (default: large-v2, best for lyrics)",
    )
    model.add_argument(
        "--demucs-model",
        default="htdemucs_ft",
        help="Demucs model (default: htdemucs_ft, best quality)",
    )
    model.add_argument(
        "--device",
        default="cuda",
        help="Device: cuda or cpu (default: cuda)",
    )
    model.add_argument(
        "--language",
        default="en",
        help="Language code (default: en)",
    )

    # Processing options
    proc = parser.add_argument_group("processing options")
    proc.add_argument(
        "--no-refine",
        action="store_true",
        help="Skip onset/offset timing refinement",
    )
    proc.add_argument(
        "--line-break-gap",
        type=float,
        default=1.0,
        help="Seconds of silence to trigger line break in output (default: 1.0)",
    )

    # Verbosity
    parser.add_argument(
        "-v", "--verbose",
        action="count",
        default=0,
        help="Increase verbosity (-v info, -vv debug)",
    )
    parser.add_argument(
        "-q", "--quiet",
        action="store_true",
        help="Suppress all logging",
    )

    args = parser.parse_args()

    # Configure logging
    if args.quiet:
        log_level = logging.CRITICAL
    elif args.verbose >= 2:
        log_level = logging.DEBUG
    elif args.verbose >= 1:
        log_level = logging.INFO
    else:
        log_level = logging.WARNING

    logging.basicConfig(
        level=log_level,
        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
        datefmt="%H:%M:%S",
    )

    # Validate inputs
    if not os.path.isfile(args.audio):
        print(f"Error: Audio file not found: {args.audio}", file=sys.stderr)
        sys.exit(1)

    if not args.artist and not args.title and not args.acoustid_key:
        print(
            "Warning: No --artist/--title and no --acoustid-key provided. "
            "Song identification may fail. Will use raw transcription as fallback.",
            file=sys.stderr,
        )

    # Run pipeline
    pipeline = LyricSyncPipeline(
        acoustid_key=args.acoustid_key,
        genius_token=args.genius_token,
        transcription_backend=args.backend,
        whisper_model=args.whisper_model,
        demucs_model=args.demucs_model,
        device=args.device,
        refine_timings_enabled=not args.no_refine,
        language=args.language,
    )

    result = pipeline.sync(
        audio_path=args.audio,
        artist=args.artist,
        title=args.title,
        album=args.album,
        output_format=args.format,
        output_path=args.output,
    )

    # Print to stdout if no output file specified
    if not args.output:
        formatters = {
            "lrc": lambda: result.to_lrc(enhanced=True, line_break_gap=args.line_break_gap),
            "lrc_standard": lambda: result.to_lrc(enhanced=False, line_break_gap=args.line_break_gap),
            "json": lambda: result.to_json(),
            "srt": lambda: result.to_srt(line_break_gap=args.line_break_gap),
            "ass": lambda: result.to_ass(line_break_gap=args.line_break_gap),
        }
        print(formatters[args.format]())

    # Print summary to stderr
    if not args.quiet:
        print(f"\n--- Sync Summary ---", file=sys.stderr)
        if result.song:
            print(f"Song: {result.song.artist} - {result.song.title}", file=sys.stderr)
            print(f"Identified via: {result.song.method}", file=sys.stderr)
        print(f"Words: {len(result.synced_words)}", file=sys.stderr)
        print(f"Quality: {result.quality_score:.2f}", file=sys.stderr)
        if result.alignment_stats:
            print(f"Direct matches: {result.alignment_stats.match_rate:.1%}", file=sys.stderr)
            print(f"Coverage: {result.alignment_stats.coverage:.1%}", file=sys.stderr)
        print(f"Time: {result.processing_time_sec:.1f}s", file=sys.stderr)


if __name__ == "__main__":
    main()