Spaces:

harphool17
/

Parakeet-ASR-Competition-Winner

Runtime error

App Files Files Community

harphool17 commited on Apr 8

Commit

528af16

verified ·

1 Parent(s): f735115

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -123

app.py CHANGED Viewed

@@ -1,124 +1,119 @@
-import gradio as gr
-import nemo.collections.asr as nemo_asr
-import torch
-import time
-# ─────────────────────────────────────────────
-#  MODEL LOADING (Runs once when server starts)
-# ─────────────────────────────────────────────
-print("Downloading/Loading Parakeet Base Model...")
-model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
-print("Attaching and FUSING your custom LoRA Adapter...")
-# Ensure ASR-Adapter.nemo is in the same folder as this app.py on Hugging Face!
-model.load_adapters("ASR-Adapter.nemo")
-model.eval()
-print("✅ Brain successfully fused! Server Ready.")
-# ─────────────────────────────────────────────
-#  INFERENCE FUNCTION
-# ─────────────────────────────────────────────
-def transcribe_audio(audio_filepath):
-    if audio_filepath is None:
-        return "Please upload or record an audio file.", "0.00s"
-    try:
-        start_time = time.time()
-        # Run inference
-        transcription = model.transcribe([audio_filepath])
-        # Extract text
-        if isinstance(transcription, tuple):
-            result_text = transcription[0][0]
-        else:
-            result_text = transcription[0]
-        process_time = time.time() - start_time
-        time_str = f"{process_time:.2f} seconds"
-        return result_text, time_str
-    except Exception as e:
-        return f"An error occurred: {str(e)}", "Error"
-# ─────────────────────────────────────────────
-#  THE "PRO" DASHBOARD UI
-# ─────────────────────────────────────────────
-# Using a sleek predefined theme
-theme = gr.themes.Soft(
-    primary_hue="indigo",
-    secondary_hue="blue",
-    neutral_hue="slate",
-    font=[gr.themes.GoogleFont("Inter"), "sans-serif"]
-)
-with gr.Blocks(theme=theme, title="Parakeet ASR") as demo:
-    # ── HEADER ──
-    gr.Markdown(
-        """
-        # 🎙️ Next-Gen Speech Recognition
-        ### Built with NVIDIA Parakeet & Custom Fine-Tuning
-        *This model was fine-tuned offline to achieve a highly competitive **0.29 Word Error Rate** on a rigorous test dataset.*
-        """
-    )
-    # ── MAIN LAYOUT (Two Columns) ──
-    with gr.Row():
-        # LEFT COLUMN: Inputs
-        with gr.Column(scale=1):
-            gr.Markdown("### 1. Input Audio")
-            # Tabbed interface for clean look
-            with gr.Tabs():
-                with gr.TabItem("Upload File"):
-                    audio_upload = gr.Audio(sources=["upload"], type="filepath", label="Audio File")
-                with gr.TabItem("Record Microphone"):
-                    audio_mic = gr.Audio(sources=["microphone"], type="filepath", label="Speak into Mic")
-            submit_btn = gr.Button("🚀 Transcribe Audio", variant="primary", size="lg")
-            clear_btn = gr.ClearButton([audio_upload, audio_mic])
-        # RIGHT COLUMN: Outputs
-        with gr.Column(scale=1):
-            gr.Markdown("### 2. Transcription Result")
-            output_text = gr.Textbox(
-                label="Transcribed Text",
-                lines=8,
-                show_copy_button=True, # Pro feature: Easy copying!
-                placeholder="Your transcription will appear here..."
-            )
-            with gr.Row():
-                # Metric to show off how fast Parakeet is
-                metrics = gr.Textbox(label="Processing Time", value="0.00s", interactive=False)
-    # ── FOOTER ──
-    gr.Markdown("---")
-    gr.Markdown(
-        """
-        **System Specs:** `Parakeet-tdt-0.6b-v2` Base | `Custom LoRA Adapter` | `Greedy Decoding`
-        """
-    )
-    # ── EVENT WIRING ──
-    # If they click submit while on the upload tab
-    submit_btn.click(
-        fn=transcribe_audio,
-        inputs=audio_upload,
-        outputs=[output_text, metrics]
-    )
-    # If they click submit while on the mic tab
-    submit_btn.click(
-        fn=transcribe_audio,
-        inputs=audio_mic,
-        outputs=[output_text, metrics]
-    )
-# ─────────────────────────────────────────────
-#  LAUNCH
-# ─────────────────────────────────────────────
-if __name__ == "__main__":
     demo.launch()

+import gradio as gr
+import nemo.collections.asr as nemo_asr
+import time
+from huggingface_hub import hf_hub_download
+# ─────────────────────────────────────────────
+#  MODEL LOADING (Runs once when server starts)
+# ─────────────────────────────────────────────
+print("Downloading/Loading Parakeet Base Model...")
+model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
+print("Downloading Custom LoRA Adapter from Model Hub...")
+# This safely pulls your adapter from your unlimited Model repository!
+adapter_path = hf_hub_download(repo_id="harphool17/parakeet-asr-adapter", filename="ASR-Adapter.nemo")
+print("Attaching and FUSING your custom LoRA Adapter...")
+model.load_adapters(adapter_path)
+model.eval()
+print("✅ Brain successfully fused! Server Ready.")
+# ─────────────────────────────────────────────
+#  INFERENCE FUNCTION
+# ─────────────────────────────────────────────
+def transcribe_audio(audio_filepath):
+    if audio_filepath is None:
+        return "Please upload or record an audio file.", "0.00s"
+    try:
+        start_time = time.time()
+        # Run inference
+        transcription = model.transcribe([audio_filepath])
+        # Extract text
+        if isinstance(transcription, tuple):
+            result_text = transcription[0][0]
+        else:
+            result_text = transcription[0]
+        process_time = time.time() - start_time
+        time_str = f"{process_time:.2f} seconds"
+        return result_text, time_str
+    except Exception as e:
+        return f"An error occurred: {str(e)}", "Error"
+# ─────────────────────────────────────────────
+#  THE "PRO" DASHBOARD UI
+# ─────────────────────────────────────────────
+theme = gr.themes.Soft(
+    primary_hue="indigo",
+    secondary_hue="blue",
+    neutral_hue="slate",
+    font=[gr.themes.GoogleFont("Inter"), "sans-serif"]
+)
+with gr.Blocks(theme=theme, title="Parakeet ASR") as demo:
+    # ── HEADER ──
+    gr.Markdown(
+        """
+        # 🎙️ Next-Gen Speech Recognition
+        ### Built with NVIDIA Parakeet & Custom Fine-Tuning
+        *This model was fine-tuned offline to achieve a highly competitive **0.29 Word Error Rate** on a rigorous test dataset.*
+        """
+    )
+    # ── MAIN LAYOUT (Two Columns) ──
+    with gr.Row():
+        # LEFT COLUMN: Inputs
+        with gr.Column(scale=1):
+            gr.Markdown("### 1. Input Audio")
+            with gr.Tabs():
+                with gr.TabItem("Upload File"):
+                    audio_upload = gr.Audio(sources=["upload"], type="filepath", label="Audio File")
+                with gr.TabItem("Record Microphone"):
+                    audio_mic = gr.Audio(sources=["microphone"], type="filepath", label="Speak into Mic")
+            submit_btn = gr.Button("🚀 Transcribe Audio", variant="primary", size="lg")
+            clear_btn = gr.ClearButton([audio_upload, audio_mic])
+        # RIGHT COLUMN: Outputs
+        with gr.Column(scale=1):
+            gr.Markdown("### 2. Transcription Result")
+            output_text = gr.Textbox(
+                label="Transcribed Text",
+                lines=8,
+                show_copy_button=True,
+                placeholder="Your transcription will appear here..."
+            )
+            with gr.Row():
+                metrics = gr.Textbox(label="Processing Time", value="0.00s", interactive=False)
+    # ── FOOTER ──
+    gr.Markdown("---")
+    gr.Markdown(
+        """
+        **System Specs:** `Parakeet-tdt-0.6b-v2` Base | `Custom LoRA Adapter` | `Greedy Decoding`
+        """
+    )
+    # ── EVENT WIRING ──
+    submit_btn.click(
+        fn=transcribe_audio,
+        inputs=audio_upload,
+        outputs=[output_text, metrics]
+    )
+    submit_btn.click(
+        fn=transcribe_audio,
+        inputs=audio_mic,
+        outputs=[output_text, metrics]
+    )
+if __name__ == "__main__":
     demo.launch()