Spaces:

harphool17
/

Parakeet-ASR-Competition-Winner

Runtime error

App Files Files Community

harphool17 commited on Apr 8

Commit

f735115

verified ·

1 Parent(s): 93cc26a

Upload 2 files

Browse files

Files changed (2) hide show

app.py +124 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import gradio as gr
+import nemo.collections.asr as nemo_asr
+import torch
+import time
+# ─────────────────────────────────────────────
+#  MODEL LOADING (Runs once when server starts)
+# ─────────────────────────────────────────────
+print("Downloading/Loading Parakeet Base Model...")
+model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
+print("Attaching and FUSING your custom LoRA Adapter...")
+# Ensure ASR-Adapter.nemo is in the same folder as this app.py on Hugging Face!
+model.load_adapters("ASR-Adapter.nemo")
+model.eval()
+print("✅ Brain successfully fused! Server Ready.")
+# ─────────────────────────────────────────────
+#  INFERENCE FUNCTION
+# ─────────────────────────────────────────────
+def transcribe_audio(audio_filepath):
+    if audio_filepath is None:
+        return "Please upload or record an audio file.", "0.00s"
+    try:
+        start_time = time.time()
+        # Run inference
+        transcription = model.transcribe([audio_filepath])
+        # Extract text
+        if isinstance(transcription, tuple):
+            result_text = transcription[0][0]
+        else:
+            result_text = transcription[0]
+        process_time = time.time() - start_time
+        time_str = f"{process_time:.2f} seconds"
+        return result_text, time_str
+    except Exception as e:
+        return f"An error occurred: {str(e)}", "Error"
+# ─────────────────────────────────────────────
+#  THE "PRO" DASHBOARD UI
+# ─────────────────────────────────────────────
+# Using a sleek predefined theme
+theme = gr.themes.Soft(
+    primary_hue="indigo",
+    secondary_hue="blue",
+    neutral_hue="slate",
+    font=[gr.themes.GoogleFont("Inter"), "sans-serif"]
+)
+with gr.Blocks(theme=theme, title="Parakeet ASR") as demo:
+    # ── HEADER ──
+    gr.Markdown(
+        """
+        # 🎙️ Next-Gen Speech Recognition
+        ### Built with NVIDIA Parakeet & Custom Fine-Tuning
+        *This model was fine-tuned offline to achieve a highly competitive **0.29 Word Error Rate** on a rigorous test dataset.*
+        """
+    )
+    # ── MAIN LAYOUT (Two Columns) ──
+    with gr.Row():
+        # LEFT COLUMN: Inputs
+        with gr.Column(scale=1):
+            gr.Markdown("### 1. Input Audio")
+            # Tabbed interface for clean look
+            with gr.Tabs():
+                with gr.TabItem("Upload File"):
+                    audio_upload = gr.Audio(sources=["upload"], type="filepath", label="Audio File")
+                with gr.TabItem("Record Microphone"):
+                    audio_mic = gr.Audio(sources=["microphone"], type="filepath", label="Speak into Mic")
+            submit_btn = gr.Button("🚀 Transcribe Audio", variant="primary", size="lg")
+            clear_btn = gr.ClearButton([audio_upload, audio_mic])
+        # RIGHT COLUMN: Outputs
+        with gr.Column(scale=1):
+            gr.Markdown("### 2. Transcription Result")
+            output_text = gr.Textbox(
+                label="Transcribed Text",
+                lines=8,
+                show_copy_button=True, # Pro feature: Easy copying!
+                placeholder="Your transcription will appear here..."
+            )
+            with gr.Row():
+                # Metric to show off how fast Parakeet is
+                metrics = gr.Textbox(label="Processing Time", value="0.00s", interactive=False)
+    # ── FOOTER ──
+    gr.Markdown("---")
+    gr.Markdown(
+        """
+        **System Specs:** `Parakeet-tdt-0.6b-v2` Base | `Custom LoRA Adapter` | `Greedy Decoding`
+        """
+    )
+    # ── EVENT WIRING ──
+    # If they click submit while on the upload tab
+    submit_btn.click(
+        fn=transcribe_audio,
+        inputs=audio_upload,
+        outputs=[output_text, metrics]
+    )
+    # If they click submit while on the mic tab
+    submit_btn.click(
+        fn=transcribe_audio,
+        inputs=audio_mic,
+        outputs=[output_text, metrics]
+    )
+# ─────────────────────────────────────────────
+#  LAUNCH
+# ───────────────────────────────────────��─────
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+torch
+nemo_toolkit[asr]
+librosa
+soundfile