RayPac006 commited on
Commit
81d4369
·
verified ·
1 Parent(s): f2d066a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -67
app.py CHANGED
@@ -1,81 +1,67 @@
1
- import torch
2
-
3
- # --- FIX FOR PYTORCH 2.6+ SECURITY ERRORS ---
4
- try:
5
- from omegaconf.listconfig import ListConfig
6
- from omegaconf.dictconfig import DictConfig
7
- torch.serialization.add_safe_globals([ListConfig, DictConfig])
8
- except ImportError:
9
- # If omegaconf isn't installed yet, we'll skip and let WhisperX handle it
10
- pass
11
- # --------------------------------------------
12
-
13
  import gradio as gr
14
  import whisperx
15
  import json
16
- import torch
17
- import gc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- # 1. Setup Device & Config
20
- device = "cuda" if torch.cuda.is_available() else "cpu"
21
- batch_size = 16
22
- compute_type = "float16" if device == "cuda" else "int8" # int8 is faster on CPU
23
 
24
- # 2. Global Model Load (Load once on startup)
25
- print(f"Loading WhisperX model on {device}...")
26
- model = whisperx.load_model("small", device, compute_type=compute_type)
 
 
27
 
28
- def generate_lyrics(audio_file_path):
29
- if audio_file_path is None:
30
- return {"error": "No audio file provided"}
 
 
 
 
 
31
 
32
- try:
33
- # 1. Transcribe
34
- audio = whisperx.load_audio(audio_file_path)
35
- result = model.transcribe(audio, batch_size=batch_size)
 
 
 
 
36
 
37
- # 2. Align (Load alignment model dynamically based on detected language)
38
- model_a, metadata = whisperx.load_align_model(
39
- language_code=result["language"],
40
- device=device
41
- )
42
- result = whisperx.align(
43
- result["segments"],
44
- model_a,
45
- metadata,
46
- audio,
47
- device,
48
- return_char_alignments=False
49
- )
50
 
51
- # 3. Format to your TypeScript Interface
52
- formatted_lyrics = []
53
- for segment in result["segments"]:
54
- formatted_lyrics.append({
55
- "time": round(segment["start"], 3),
56
- "text": segment["text"].strip(),
57
- "chords": []
58
- })
59
 
60
- # Memory Cleanup (Crucial for HF Free Tier)
61
- del model_a
62
- gc.collect()
63
- if device == "cuda":
64
- torch.cuda.empty_cache()
65
 
66
- return {"lyrics": formatted_lyrics}
 
67
 
68
- except Exception as e:
69
- return {"error": str(e)}
70
 
71
- # 3. Gradio Interface
72
- demo = gr.Interface(
73
- fn=generate_lyrics,
74
- inputs=gr.Audio(type="filepath", label="Upload Vocals/Audio"),
75
- outputs=gr.JSON(label="JSON Result"),
76
- title="WhisperX Aligned Lyric Generator",
77
- description="Transcribes audio and provides word-level alignment formatted for your TypeScript interface."
78
- )
79
 
80
- if __name__ == "__main__":
81
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import whisperx
3
  import json
4
+ import tempfile
5
+ import os
6
+
7
+ def generate_lyrics(audio_file):
8
+ device = "cpu" # HF Spaces free tier = CPU
9
+ batch_size = 8
10
+ compute_type = "int8" # CPU-safe
11
+
12
+ # Load WhisperX model
13
+ model = whisperx.load_model(
14
+ "small",
15
+ device,
16
+ compute_type=compute_type
17
+ )
18
+
19
+ # Load audio
20
+ audio = whisperx.load_audio(audio_file)
21
 
22
+ # Transcribe
23
+ result = model.transcribe(audio, batch_size=batch_size)
 
 
24
 
25
+ # Align timestamps
26
+ model_a, metadata = whisperx.load_align_model(
27
+ language_code=result["language"],
28
+ device=device
29
+ )
30
 
31
+ result = whisperx.align(
32
+ result["segments"],
33
+ model_a,
34
+ metadata,
35
+ audio,
36
+ device,
37
+ return_char_alignments=False
38
+ )
39
 
40
+ # Format output
41
+ formatted_lyrics = []
42
+ for segment in result["segments"]:
43
+ formatted_lyrics.append({
44
+ "time": segment["start"],
45
+ "text": segment["text"].strip(),
46
+ "chords": []
47
+ })
48
 
49
+ return json.dumps({"lyrics": formatted_lyrics}, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
50
 
 
 
 
 
 
 
 
 
51
 
52
+ with gr.Blocks() as demo:
53
+ gr.Markdown("# 🎵 WhisperX Lyrics Generator")
54
+ gr.Markdown("Upload an audio file and get timestamped lyrics (aligned).")
 
 
55
 
56
+ audio_input = gr.Audio(type="filepath", label="Upload Audio")
57
+ output_json = gr.Textbox(label="Lyrics JSON", lines=20)
58
 
59
+ generate_btn = gr.Button("Generate Lyrics")
 
60
 
61
+ generate_btn.click(
62
+ fn=generate_lyrics,
63
+ inputs=audio_input,
64
+ outputs=output_json
65
+ )
 
 
 
66
 
67
+ demo.launch()