rikhoffbauer2 commited on
Commit
f6f2120
·
verified ·
1 Parent(s): 3b79c03

Upload lyric_sync/cli.py

Browse files
Files changed (1) hide show
  1. lyric_sync/cli.py +198 -0
lyric_sync/cli.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Command-line interface for lyric_sync.
3
+ """
4
+
5
+ import argparse
6
+ import logging
7
+ import os
8
+ import sys
9
+
10
+ from lyric_sync.pipeline import LyricSyncPipeline
11
+
12
+
13
+ def main():
14
+ parser = argparse.ArgumentParser(
15
+ prog="lyric-sync",
16
+ description="Automatic perfect song lyric acquisition and synchronization.",
17
+ formatter_class=argparse.RawDescriptionHelpFormatter,
18
+ epilog="""
19
+ Examples:
20
+ # Full automatic pipeline (requires AcoustID key + GPU)
21
+ lyric-sync song.mp3 --acoustid-key YOUR_KEY --output synced.lrc
22
+
23
+ # With known metadata (skip identification)
24
+ lyric-sync song.mp3 --artist "Queen" --title "Bohemian Rhapsody" -o synced.lrc
25
+
26
+ # CPU-only, simpler transcription
27
+ lyric-sync song.mp3 --device cpu --backend whisper --artist "Artist" --title "Title"
28
+
29
+ # JSON output for programmatic use
30
+ lyric-sync song.mp3 --format json --artist "Artist" --title "Title" -o synced.json
31
+
32
+ # ASS karaoke subtitles
33
+ lyric-sync song.mp3 --format ass --artist "Artist" --title "Title" -o karaoke.ass
34
+ """,
35
+ )
36
+
37
+ parser.add_argument("audio", help="Path to audio file (mp3, flac, wav, etc.)")
38
+ parser.add_argument("-o", "--output", help="Output file path (default: stdout)")
39
+ parser.add_argument(
40
+ "--format",
41
+ choices=["lrc", "lrc_standard", "json", "srt", "ass"],
42
+ default="lrc",
43
+ help="Output format (default: lrc = enhanced word-level LRC)",
44
+ )
45
+
46
+ # Metadata overrides
47
+ meta = parser.add_argument_group("song metadata (skip identification)")
48
+ meta.add_argument("--artist", help="Artist name")
49
+ meta.add_argument("--title", help="Track title")
50
+ meta.add_argument("--album", help="Album name")
51
+
52
+ # API keys
53
+ keys = parser.add_argument_group("API keys")
54
+ keys.add_argument(
55
+ "--acoustid-key",
56
+ default=os.environ.get("ACOUSTID_API_KEY"),
57
+ help="AcoustID API key (or set ACOUSTID_API_KEY env var)",
58
+ )
59
+ keys.add_argument(
60
+ "--genius-token",
61
+ default=os.environ.get("GENIUS_TOKEN"),
62
+ help="Genius API token (or set GENIUS_TOKEN env var)",
63
+ )
64
+
65
+ # Model configuration
66
+ model = parser.add_argument_group("model configuration")
67
+ model.add_argument(
68
+ "--backend",
69
+ choices=["whisperx", "whisper", "granite"],
70
+ default="whisperx",
71
+ help="Transcription backend (default: whisperx)",
72
+ )
73
+ model.add_argument(
74
+ "--whisper-model",
75
+ default="large-v2",
76
+ help="Whisper model size (default: large-v2, best for lyrics)",
77
+ )
78
+ model.add_argument(
79
+ "--demucs-model",
80
+ default="htdemucs_ft",
81
+ help="Demucs model (default: htdemucs_ft, best quality)",
82
+ )
83
+ model.add_argument(
84
+ "--device",
85
+ default="cuda",
86
+ help="Device: cuda or cpu (default: cuda)",
87
+ )
88
+ model.add_argument(
89
+ "--language",
90
+ default="en",
91
+ help="Language code (default: en)",
92
+ )
93
+
94
+ # Processing options
95
+ proc = parser.add_argument_group("processing options")
96
+ proc.add_argument(
97
+ "--no-refine",
98
+ action="store_true",
99
+ help="Skip onset/offset timing refinement",
100
+ )
101
+ proc.add_argument(
102
+ "--line-break-gap",
103
+ type=float,
104
+ default=1.0,
105
+ help="Seconds of silence to trigger line break in output (default: 1.0)",
106
+ )
107
+
108
+ # Verbosity
109
+ parser.add_argument(
110
+ "-v", "--verbose",
111
+ action="count",
112
+ default=0,
113
+ help="Increase verbosity (-v info, -vv debug)",
114
+ )
115
+ parser.add_argument(
116
+ "-q", "--quiet",
117
+ action="store_true",
118
+ help="Suppress all logging",
119
+ )
120
+
121
+ args = parser.parse_args()
122
+
123
+ # Configure logging
124
+ if args.quiet:
125
+ log_level = logging.CRITICAL
126
+ elif args.verbose >= 2:
127
+ log_level = logging.DEBUG
128
+ elif args.verbose >= 1:
129
+ log_level = logging.INFO
130
+ else:
131
+ log_level = logging.WARNING
132
+
133
+ logging.basicConfig(
134
+ level=log_level,
135
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
136
+ datefmt="%H:%M:%S",
137
+ )
138
+
139
+ # Validate inputs
140
+ if not os.path.isfile(args.audio):
141
+ print(f"Error: Audio file not found: {args.audio}", file=sys.stderr)
142
+ sys.exit(1)
143
+
144
+ if not args.artist and not args.title and not args.acoustid_key:
145
+ print(
146
+ "Warning: No --artist/--title and no --acoustid-key provided. "
147
+ "Song identification may fail. Will use raw transcription as fallback.",
148
+ file=sys.stderr,
149
+ )
150
+
151
+ # Run pipeline
152
+ pipeline = LyricSyncPipeline(
153
+ acoustid_key=args.acoustid_key,
154
+ genius_token=args.genius_token,
155
+ transcription_backend=args.backend,
156
+ whisper_model=args.whisper_model,
157
+ demucs_model=args.demucs_model,
158
+ device=args.device,
159
+ refine_timings_enabled=not args.no_refine,
160
+ language=args.language,
161
+ )
162
+
163
+ result = pipeline.sync(
164
+ audio_path=args.audio,
165
+ artist=args.artist,
166
+ title=args.title,
167
+ album=args.album,
168
+ output_format=args.format,
169
+ output_path=args.output,
170
+ )
171
+
172
+ # Print to stdout if no output file specified
173
+ if not args.output:
174
+ formatters = {
175
+ "lrc": lambda: result.to_lrc(enhanced=True, line_break_gap=args.line_break_gap),
176
+ "lrc_standard": lambda: result.to_lrc(enhanced=False, line_break_gap=args.line_break_gap),
177
+ "json": lambda: result.to_json(),
178
+ "srt": lambda: result.to_srt(line_break_gap=args.line_break_gap),
179
+ "ass": lambda: result.to_ass(line_break_gap=args.line_break_gap),
180
+ }
181
+ print(formatters[args.format]())
182
+
183
+ # Print summary to stderr
184
+ if not args.quiet:
185
+ print(f"\n--- Sync Summary ---", file=sys.stderr)
186
+ if result.song:
187
+ print(f"Song: {result.song.artist} - {result.song.title}", file=sys.stderr)
188
+ print(f"Identified via: {result.song.method}", file=sys.stderr)
189
+ print(f"Words: {len(result.synced_words)}", file=sys.stderr)
190
+ print(f"Quality: {result.quality_score:.2f}", file=sys.stderr)
191
+ if result.alignment_stats:
192
+ print(f"Direct matches: {result.alignment_stats.match_rate:.1%}", file=sys.stderr)
193
+ print(f"Coverage: {result.alignment_stats.coverage:.1%}", file=sys.stderr)
194
+ print(f"Time: {result.processing_time_sec:.1f}s", file=sys.stderr)
195
+
196
+
197
+ if __name__ == "__main__":
198
+ main()