harphool17 commited on
Commit
f735115
Β·
verified Β·
1 Parent(s): 93cc26a

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +124 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import nemo.collections.asr as nemo_asr
3
+ import torch
4
+ import time
5
+
6
+ # ─────────────────────────────────────────────
7
+ # MODEL LOADING (Runs once when server starts)
8
+ # ─────────────────────────────────────────────
9
+ print("Downloading/Loading Parakeet Base Model...")
10
+ model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
11
+
12
+ print("Attaching and FUSING your custom LoRA Adapter...")
13
+ # Ensure ASR-Adapter.nemo is in the same folder as this app.py on Hugging Face!
14
+ model.load_adapters("ASR-Adapter.nemo")
15
+ model.eval()
16
+ print("βœ… Brain successfully fused! Server Ready.")
17
+
18
+ # ─────────────────────────────────────────────
19
+ # INFERENCE FUNCTION
20
+ # ─────────────────────────────────────────────
21
+ def transcribe_audio(audio_filepath):
22
+ if audio_filepath is None:
23
+ return "Please upload or record an audio file.", "0.00s"
24
+
25
+ try:
26
+ start_time = time.time()
27
+
28
+ # Run inference
29
+ transcription = model.transcribe([audio_filepath])
30
+
31
+ # Extract text
32
+ if isinstance(transcription, tuple):
33
+ result_text = transcription[0][0]
34
+ else:
35
+ result_text = transcription[0]
36
+
37
+ process_time = time.time() - start_time
38
+ time_str = f"{process_time:.2f} seconds"
39
+
40
+ return result_text, time_str
41
+
42
+ except Exception as e:
43
+ return f"An error occurred: {str(e)}", "Error"
44
+
45
+ # ─────────────────────────────────────────────
46
+ # THE "PRO" DASHBOARD UI
47
+ # ─────────────────────────────────────────────
48
+ # Using a sleek predefined theme
49
+ theme = gr.themes.Soft(
50
+ primary_hue="indigo",
51
+ secondary_hue="blue",
52
+ neutral_hue="slate",
53
+ font=[gr.themes.GoogleFont("Inter"), "sans-serif"]
54
+ )
55
+
56
+ with gr.Blocks(theme=theme, title="Parakeet ASR") as demo:
57
+
58
+ # ── HEADER ──
59
+ gr.Markdown(
60
+ """
61
+ # πŸŽ™οΈ Next-Gen Speech Recognition
62
+ ### Built with NVIDIA Parakeet & Custom Fine-Tuning
63
+ *This model was fine-tuned offline to achieve a highly competitive **0.29 Word Error Rate** on a rigorous test dataset.*
64
+ """
65
+ )
66
+
67
+ # ── MAIN LAYOUT (Two Columns) ──
68
+ with gr.Row():
69
+
70
+ # LEFT COLUMN: Inputs
71
+ with gr.Column(scale=1):
72
+ gr.Markdown("### 1. Input Audio")
73
+
74
+ # Tabbed interface for clean look
75
+ with gr.Tabs():
76
+ with gr.TabItem("Upload File"):
77
+ audio_upload = gr.Audio(sources=["upload"], type="filepath", label="Audio File")
78
+ with gr.TabItem("Record Microphone"):
79
+ audio_mic = gr.Audio(sources=["microphone"], type="filepath", label="Speak into Mic")
80
+
81
+ submit_btn = gr.Button("πŸš€ Transcribe Audio", variant="primary", size="lg")
82
+ clear_btn = gr.ClearButton([audio_upload, audio_mic])
83
+
84
+ # RIGHT COLUMN: Outputs
85
+ with gr.Column(scale=1):
86
+ gr.Markdown("### 2. Transcription Result")
87
+ output_text = gr.Textbox(
88
+ label="Transcribed Text",
89
+ lines=8,
90
+ show_copy_button=True, # Pro feature: Easy copying!
91
+ placeholder="Your transcription will appear here..."
92
+ )
93
+
94
+ with gr.Row():
95
+ # Metric to show off how fast Parakeet is
96
+ metrics = gr.Textbox(label="Processing Time", value="0.00s", interactive=False)
97
+
98
+ # ── FOOTER ──
99
+ gr.Markdown("---")
100
+ gr.Markdown(
101
+ """
102
+ **System Specs:** `Parakeet-tdt-0.6b-v2` Base | `Custom LoRA Adapter` | `Greedy Decoding`
103
+ """
104
+ )
105
+
106
+ # ── EVENT WIRING ──
107
+ # If they click submit while on the upload tab
108
+ submit_btn.click(
109
+ fn=transcribe_audio,
110
+ inputs=audio_upload,
111
+ outputs=[output_text, metrics]
112
+ )
113
+ # If they click submit while on the mic tab
114
+ submit_btn.click(
115
+ fn=transcribe_audio,
116
+ inputs=audio_mic,
117
+ outputs=[output_text, metrics]
118
+ )
119
+
120
+ # ─────────────────────────────────────────────
121
+ # LAUNCH
122
+ # ───────────────────────────────────────��─────
123
+ if __name__ == "__main__":
124
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ nemo_toolkit[asr]
4
+ librosa
5
+ soundfile