Spaces:

RayPac006
/

get-lyrics

Sleeping

App Files Files Community

RayPac006 commited on Jan 28

Commit

81d4369

verified ·

1 Parent(s): f2d066a

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -67

app.py CHANGED Viewed

@@ -1,81 +1,67 @@
-import torch
-# --- FIX FOR PYTORCH 2.6+ SECURITY ERRORS ---
-try:
-    from omegaconf.listconfig import ListConfig
-    from omegaconf.dictconfig import DictConfig
-    torch.serialization.add_safe_globals([ListConfig, DictConfig])
-except ImportError:
-    # If omegaconf isn't installed yet, we'll skip and let WhisperX handle it
-    pass
-# --------------------------------------------
 import gradio as gr
 import whisperx
 import json
-import torch
-import gc
-# 1. Setup Device & Config
-device = "cuda" if torch.cuda.is_available() else "cpu"
-batch_size = 16
-compute_type = "float16" if device == "cuda" else "int8" # int8 is faster on CPU
-# 2. Global Model Load (Load once on startup)
-print(f"Loading WhisperX model on {device}...")
-model = whisperx.load_model("small", device, compute_type=compute_type)
-def generate_lyrics(audio_file_path):
-    if audio_file_path is None:
-        return {"error": "No audio file provided"}
-    try:
-        # 1. Transcribe
-        audio = whisperx.load_audio(audio_file_path)
-        result = model.transcribe(audio, batch_size=batch_size)
-        # 2. Align (Load alignment model dynamically based on detected language)
-        model_a, metadata = whisperx.load_align_model(
-            language_code=result["language"],
-            device=device
-        )
-        result = whisperx.align(
-            result["segments"],
-            model_a,
-            metadata,
-            audio,
-            device,
-            return_char_alignments=False
-        )
-        # 3. Format to your TypeScript Interface
-        formatted_lyrics = []
-        for segment in result["segments"]:
-            formatted_lyrics.append({
-                "time": round(segment["start"], 3),
-                "text": segment["text"].strip(),
-                "chords": []
-            })
-        # Memory Cleanup (Crucial for HF Free Tier)
-        del model_a
-        gc.collect()
-        if device == "cuda":
-            torch.cuda.empty_cache()
-        return {"lyrics": formatted_lyrics}
-    except Exception as e:
-        return {"error": str(e)}
-# 3. Gradio Interface
-demo = gr.Interface(
-    fn=generate_lyrics,
-    inputs=gr.Audio(type="filepath", label="Upload Vocals/Audio"),
-    outputs=gr.JSON(label="JSON Result"),
-    title="WhisperX Aligned Lyric Generator",
-    description="Transcribes audio and provides word-level alignment formatted for your TypeScript interface."
-)
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import whisperx
 import json
+import tempfile
+import os
+def generate_lyrics(audio_file):
+    device = "cpu"  # HF Spaces free tier = CPU
+    batch_size = 8
+    compute_type = "int8"  # CPU-safe
+    # Load WhisperX model
+    model = whisperx.load_model(
+        "small",
+        device,
+        compute_type=compute_type
+    )
+    # Load audio
+    audio = whisperx.load_audio(audio_file)
+    # Transcribe
+    result = model.transcribe(audio, batch_size=batch_size)
+    # Align timestamps
+    model_a, metadata = whisperx.load_align_model(
+        language_code=result["language"],
+        device=device
+    )
+    result = whisperx.align(
+        result["segments"],
+        model_a,
+        metadata,
+        audio,
+        device,
+        return_char_alignments=False
+    )
+    # Format output
+    formatted_lyrics = []
+    for segment in result["segments"]:
+        formatted_lyrics.append({
+            "time": segment["start"],
+            "text": segment["text"].strip(),
+            "chords": []
+        })
+    return json.dumps({"lyrics": formatted_lyrics}, indent=2)
+with gr.Blocks() as demo:
+    gr.Markdown("# 🎵 WhisperX Lyrics Generator")
+    gr.Markdown("Upload an audio file and get timestamped lyrics (aligned).")
+    audio_input = gr.Audio(type="filepath", label="Upload Audio")
+    output_json = gr.Textbox(label="Lyrics JSON", lines=20)
+    generate_btn = gr.Button("Generate Lyrics")
+    generate_btn.click(
+        fn=generate_lyrics,
+        inputs=audio_input,
+        outputs=output_json
+    )
+demo.launch()