Spaces:

RayPac006
/

get-lyrics

Running

App Files Files Community

RayPac006 commited on 26 days ago

Commit

1716dc6

verified ·

1 Parent(s): aca9475

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -52

app.py CHANGED Viewed

@@ -1,67 +1,81 @@
 import gradio as gr
 import whisperx
 import json
-import tempfile
-import os
-def generate_lyrics(audio_file):
-    device = "cpu"  # HF Spaces free tier = CPU
-    batch_size = 8
-    compute_type = "int8"  # CPU-safe
-    # Load WhisperX model
-    model = whisperx.load_model(
-        "small",
-        device,
-        compute_type=compute_type
-    )
-    # Load audio
-    audio = whisperx.load_audio(audio_file)
-    # Transcribe
-    result = model.transcribe(audio, batch_size=batch_size)
-    # Align timestamps
-    model_a, metadata = whisperx.load_align_model(
-        language_code=result["language"],
-        device=device
-    )
-    result = whisperx.align(
-        result["segments"],
-        model_a,
-        metadata,
-        audio,
-        device,
-        return_char_alignments=False
-    )
-    # Format output
-    formatted_lyrics = []
-    for segment in result["segments"]:
-        formatted_lyrics.append({
-            "time": segment["start"],
-            "text": segment["text"].strip(),
-            "chords": []
-        })
-    return json.dumps({"lyrics": formatted_lyrics}, indent=2)
-with gr.Blocks() as demo:
-    gr.Markdown("# 🎵 WhisperX Lyrics Generator")
-    gr.Markdown("Upload an audio file and get timestamped lyrics (aligned).")
-    audio_input = gr.Audio(type="filepath", label="Upload Audio")
-    output_json = gr.Textbox(label="Lyrics JSON", lines=20)
-    generate_btn = gr.Button("Generate Lyrics")
-    generate_btn.click(
-        fn=generate_lyrics,
-        inputs=audio_input,
-        outputs=output_json
-    )
-demo.launch()

+import os
+# 1. Force PyTorch to allow loading "unsafe" weights (The VAD models require this)
+os.environ["TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD"] = "1"
 import gradio as gr
 import whisperx
 import json
+import torch
+import gc
+# 2. Global Patch for torch.load (Backup fix for libraries that hardcode parameters)
+_original_load = torch.load
+def patched_load(*args, **kwargs):
+    if 'weights_only' in kwargs:
+        kwargs['weights_only'] = False
+    return _original_load(*args, **kwargs)
+torch.load = patched_load
+# 1. Setup Device & Config
+device = "cuda" if torch.cuda.is_available() else "cpu"
+batch_size = 16
+compute_type = "float16" if device == "cuda" else "int8" # int8 is faster on CPU
+# 2. Global Model Load (Load once on startup)
+print(f"Loading WhisperX model on {device}...")
+model = whisperx.load_model("small", device, compute_type=compute_type)
+def generate_lyrics(audio_file_path):
+    if audio_file_path is None:
+        return {"error": "No audio file provided"}
+    try:
+        # 1. Transcribe
+        audio = whisperx.load_audio(audio_file_path)
+        result = model.transcribe(audio, batch_size=batch_size)
+        # 2. Align (Load alignment model dynamically based on detected language)
+        model_a, metadata = whisperx.load_align_model(
+            language_code=result["language"],
+            device=device
+        )
+        result = whisperx.align(
+            result["segments"],
+            model_a,
+            metadata,
+            audio,
+            device,
+            return_char_alignments=False
+        )
+        # 3. Format to your TypeScript Interface
+        formatted_lyrics = []
+        for segment in result["segments"]:
+            formatted_lyrics.append({
+                "time": round(segment["start"], 3),
+                "text": segment["text"].strip(),
+                "chords": []
+            })
+        # Memory Cleanup (Crucial for HF Free Tier)
+        del model_a
+        gc.collect()
+        if device == "cuda":
+            torch.cuda.empty_cache()
+        return {"lyrics": formatted_lyrics}
+    except Exception as e:
+        return {"error": str(e)}
+# 3. Gradio Interface
+demo = gr.Interface(
+    fn=generate_lyrics,
+    inputs=gr.Audio(type="filepath", label="Upload Vocals/Audio"),
+    outputs=gr.JSON(label="JSON Result"),
+    title="WhisperX Aligned Lyric Generator",
+    description="Transcribes audio and provides word-level alignment formatted for your TypeScript interface."
+)
+if __name__ == "__main__":
+    demo.launch()