RayPac006 commited on
Commit
1716dc6
·
verified ·
1 Parent(s): aca9475

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -52
app.py CHANGED
@@ -1,67 +1,81 @@
 
 
 
 
1
  import gradio as gr
2
  import whisperx
3
  import json
4
- import tempfile
5
- import os
6
-
7
- def generate_lyrics(audio_file):
8
- device = "cpu" # HF Spaces free tier = CPU
9
- batch_size = 8
10
- compute_type = "int8" # CPU-safe
11
-
12
- # Load WhisperX model
13
- model = whisperx.load_model(
14
- "small",
15
- device,
16
- compute_type=compute_type
17
- )
18
 
19
- # Load audio
20
- audio = whisperx.load_audio(audio_file)
 
 
 
 
 
21
 
22
- # Transcribe
23
- result = model.transcribe(audio, batch_size=batch_size)
 
 
24
 
25
- # Align timestamps
26
- model_a, metadata = whisperx.load_align_model(
27
- language_code=result["language"],
28
- device=device
29
- )
30
 
31
- result = whisperx.align(
32
- result["segments"],
33
- model_a,
34
- metadata,
35
- audio,
36
- device,
37
- return_char_alignments=False
38
- )
39
 
40
- # Format output
41
- formatted_lyrics = []
42
- for segment in result["segments"]:
43
- formatted_lyrics.append({
44
- "time": segment["start"],
45
- "text": segment["text"].strip(),
46
- "chords": []
47
- })
48
 
49
- return json.dumps({"lyrics": formatted_lyrics}, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
50
 
 
 
 
 
 
 
 
 
51
 
52
- with gr.Blocks() as demo:
53
- gr.Markdown("# 🎵 WhisperX Lyrics Generator")
54
- gr.Markdown("Upload an audio file and get timestamped lyrics (aligned).")
 
 
55
 
56
- audio_input = gr.Audio(type="filepath", label="Upload Audio")
57
- output_json = gr.Textbox(label="Lyrics JSON", lines=20)
58
 
59
- generate_btn = gr.Button("Generate Lyrics")
 
60
 
61
- generate_btn.click(
62
- fn=generate_lyrics,
63
- inputs=audio_input,
64
- outputs=output_json
65
- )
 
 
 
66
 
67
- demo.launch()
 
 
1
+ import os
2
+ # 1. Force PyTorch to allow loading "unsafe" weights (The VAD models require this)
3
+ os.environ["TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD"] = "1"
4
+
5
  import gradio as gr
6
  import whisperx
7
  import json
8
+ import torch
9
+ import gc
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # 2. Global Patch for torch.load (Backup fix for libraries that hardcode parameters)
12
+ _original_load = torch.load
13
+ def patched_load(*args, **kwargs):
14
+ if 'weights_only' in kwargs:
15
+ kwargs['weights_only'] = False
16
+ return _original_load(*args, **kwargs)
17
+ torch.load = patched_load
18
 
19
+ # 1. Setup Device & Config
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ batch_size = 16
22
+ compute_type = "float16" if device == "cuda" else "int8" # int8 is faster on CPU
23
 
24
+ # 2. Global Model Load (Load once on startup)
25
+ print(f"Loading WhisperX model on {device}...")
26
+ model = whisperx.load_model("small", device, compute_type=compute_type)
 
 
27
 
28
+ def generate_lyrics(audio_file_path):
29
+ if audio_file_path is None:
30
+ return {"error": "No audio file provided"}
 
 
 
 
 
31
 
32
+ try:
33
+ # 1. Transcribe
34
+ audio = whisperx.load_audio(audio_file_path)
35
+ result = model.transcribe(audio, batch_size=batch_size)
 
 
 
 
36
 
37
+ # 2. Align (Load alignment model dynamically based on detected language)
38
+ model_a, metadata = whisperx.load_align_model(
39
+ language_code=result["language"],
40
+ device=device
41
+ )
42
+ result = whisperx.align(
43
+ result["segments"],
44
+ model_a,
45
+ metadata,
46
+ audio,
47
+ device,
48
+ return_char_alignments=False
49
+ )
50
 
51
+ # 3. Format to your TypeScript Interface
52
+ formatted_lyrics = []
53
+ for segment in result["segments"]:
54
+ formatted_lyrics.append({
55
+ "time": round(segment["start"], 3),
56
+ "text": segment["text"].strip(),
57
+ "chords": []
58
+ })
59
 
60
+ # Memory Cleanup (Crucial for HF Free Tier)
61
+ del model_a
62
+ gc.collect()
63
+ if device == "cuda":
64
+ torch.cuda.empty_cache()
65
 
66
+ return {"lyrics": formatted_lyrics}
 
67
 
68
+ except Exception as e:
69
+ return {"error": str(e)}
70
 
71
+ # 3. Gradio Interface
72
+ demo = gr.Interface(
73
+ fn=generate_lyrics,
74
+ inputs=gr.Audio(type="filepath", label="Upload Vocals/Audio"),
75
+ outputs=gr.JSON(label="JSON Result"),
76
+ title="WhisperX Aligned Lyric Generator",
77
+ description="Transcribes audio and provides word-level alignment formatted for your TypeScript interface."
78
+ )
79
 
80
+ if __name__ == "__main__":
81
+ demo.launch()