harphool17 commited on
Commit
528af16
Β·
verified Β·
1 Parent(s): f735115

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -123
app.py CHANGED
@@ -1,124 +1,119 @@
1
- import gradio as gr
2
- import nemo.collections.asr as nemo_asr
3
- import torch
4
- import time
5
-
6
- # ─────────────────────────────────────────────
7
- # MODEL LOADING (Runs once when server starts)
8
- # ─────────────────────────────────────────────
9
- print("Downloading/Loading Parakeet Base Model...")
10
- model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
11
-
12
- print("Attaching and FUSING your custom LoRA Adapter...")
13
- # Ensure ASR-Adapter.nemo is in the same folder as this app.py on Hugging Face!
14
- model.load_adapters("ASR-Adapter.nemo")
15
- model.eval()
16
- print("βœ… Brain successfully fused! Server Ready.")
17
-
18
- # ─────────────────────────────────────────────
19
- # INFERENCE FUNCTION
20
- # ─────────────────────────────────────────────
21
- def transcribe_audio(audio_filepath):
22
- if audio_filepath is None:
23
- return "Please upload or record an audio file.", "0.00s"
24
-
25
- try:
26
- start_time = time.time()
27
-
28
- # Run inference
29
- transcription = model.transcribe([audio_filepath])
30
-
31
- # Extract text
32
- if isinstance(transcription, tuple):
33
- result_text = transcription[0][0]
34
- else:
35
- result_text = transcription[0]
36
-
37
- process_time = time.time() - start_time
38
- time_str = f"{process_time:.2f} seconds"
39
-
40
- return result_text, time_str
41
-
42
- except Exception as e:
43
- return f"An error occurred: {str(e)}", "Error"
44
-
45
- # ─────────────────────────────────────────────
46
- # THE "PRO" DASHBOARD UI
47
- # ─────────────────────────────────────────────
48
- # Using a sleek predefined theme
49
- theme = gr.themes.Soft(
50
- primary_hue="indigo",
51
- secondary_hue="blue",
52
- neutral_hue="slate",
53
- font=[gr.themes.GoogleFont("Inter"), "sans-serif"]
54
- )
55
-
56
- with gr.Blocks(theme=theme, title="Parakeet ASR") as demo:
57
-
58
- # ── HEADER ──
59
- gr.Markdown(
60
- """
61
- # πŸŽ™οΈ Next-Gen Speech Recognition
62
- ### Built with NVIDIA Parakeet & Custom Fine-Tuning
63
- *This model was fine-tuned offline to achieve a highly competitive **0.29 Word Error Rate** on a rigorous test dataset.*
64
- """
65
- )
66
-
67
- # ── MAIN LAYOUT (Two Columns) ──
68
- with gr.Row():
69
-
70
- # LEFT COLUMN: Inputs
71
- with gr.Column(scale=1):
72
- gr.Markdown("### 1. Input Audio")
73
-
74
- # Tabbed interface for clean look
75
- with gr.Tabs():
76
- with gr.TabItem("Upload File"):
77
- audio_upload = gr.Audio(sources=["upload"], type="filepath", label="Audio File")
78
- with gr.TabItem("Record Microphone"):
79
- audio_mic = gr.Audio(sources=["microphone"], type="filepath", label="Speak into Mic")
80
-
81
- submit_btn = gr.Button("πŸš€ Transcribe Audio", variant="primary", size="lg")
82
- clear_btn = gr.ClearButton([audio_upload, audio_mic])
83
-
84
- # RIGHT COLUMN: Outputs
85
- with gr.Column(scale=1):
86
- gr.Markdown("### 2. Transcription Result")
87
- output_text = gr.Textbox(
88
- label="Transcribed Text",
89
- lines=8,
90
- show_copy_button=True, # Pro feature: Easy copying!
91
- placeholder="Your transcription will appear here..."
92
- )
93
-
94
- with gr.Row():
95
- # Metric to show off how fast Parakeet is
96
- metrics = gr.Textbox(label="Processing Time", value="0.00s", interactive=False)
97
-
98
- # ── FOOTER ──
99
- gr.Markdown("---")
100
- gr.Markdown(
101
- """
102
- **System Specs:** `Parakeet-tdt-0.6b-v2` Base | `Custom LoRA Adapter` | `Greedy Decoding`
103
- """
104
- )
105
-
106
- # ── EVENT WIRING ──
107
- # If they click submit while on the upload tab
108
- submit_btn.click(
109
- fn=transcribe_audio,
110
- inputs=audio_upload,
111
- outputs=[output_text, metrics]
112
- )
113
- # If they click submit while on the mic tab
114
- submit_btn.click(
115
- fn=transcribe_audio,
116
- inputs=audio_mic,
117
- outputs=[output_text, metrics]
118
- )
119
-
120
- # ─────────────────────────────────────────────
121
- # LAUNCH
122
- # ─────────────────────────────────────────────
123
- if __name__ == "__main__":
124
  demo.launch()
 
1
+ import gradio as gr
2
+ import nemo.collections.asr as nemo_asr
3
+ import time
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ # ─────────────────────────────────────────────
7
+ # MODEL LOADING (Runs once when server starts)
8
+ # ─────────────────────────────────────────────
9
+ print("Downloading/Loading Parakeet Base Model...")
10
+ model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
11
+
12
+ print("Downloading Custom LoRA Adapter from Model Hub...")
13
+ # This safely pulls your adapter from your unlimited Model repository!
14
+ adapter_path = hf_hub_download(repo_id="harphool17/parakeet-asr-adapter", filename="ASR-Adapter.nemo")
15
+
16
+ print("Attaching and FUSING your custom LoRA Adapter...")
17
+ model.load_adapters(adapter_path)
18
+ model.eval()
19
+ print("βœ… Brain successfully fused! Server Ready.")
20
+
21
+ # ─────────────────────────────────────────────
22
+ # INFERENCE FUNCTION
23
+ # ─────────────────────────────────────────────
24
+ def transcribe_audio(audio_filepath):
25
+ if audio_filepath is None:
26
+ return "Please upload or record an audio file.", "0.00s"
27
+
28
+ try:
29
+ start_time = time.time()
30
+
31
+ # Run inference
32
+ transcription = model.transcribe([audio_filepath])
33
+
34
+ # Extract text
35
+ if isinstance(transcription, tuple):
36
+ result_text = transcription[0][0]
37
+ else:
38
+ result_text = transcription[0]
39
+
40
+ process_time = time.time() - start_time
41
+ time_str = f"{process_time:.2f} seconds"
42
+
43
+ return result_text, time_str
44
+
45
+ except Exception as e:
46
+ return f"An error occurred: {str(e)}", "Error"
47
+
48
+ # ─────────────────────────────────────────────
49
+ # THE "PRO" DASHBOARD UI
50
+ # ─────────────────────────────────────────────
51
+ theme = gr.themes.Soft(
52
+ primary_hue="indigo",
53
+ secondary_hue="blue",
54
+ neutral_hue="slate",
55
+ font=[gr.themes.GoogleFont("Inter"), "sans-serif"]
56
+ )
57
+
58
+ with gr.Blocks(theme=theme, title="Parakeet ASR") as demo:
59
+
60
+ # ── HEADER ──
61
+ gr.Markdown(
62
+ """
63
+ # πŸŽ™οΈ Next-Gen Speech Recognition
64
+ ### Built with NVIDIA Parakeet & Custom Fine-Tuning
65
+ *This model was fine-tuned offline to achieve a highly competitive **0.29 Word Error Rate** on a rigorous test dataset.*
66
+ """
67
+ )
68
+
69
+ # ── MAIN LAYOUT (Two Columns) ──
70
+ with gr.Row():
71
+
72
+ # LEFT COLUMN: Inputs
73
+ with gr.Column(scale=1):
74
+ gr.Markdown("### 1. Input Audio")
75
+
76
+ with gr.Tabs():
77
+ with gr.TabItem("Upload File"):
78
+ audio_upload = gr.Audio(sources=["upload"], type="filepath", label="Audio File")
79
+ with gr.TabItem("Record Microphone"):
80
+ audio_mic = gr.Audio(sources=["microphone"], type="filepath", label="Speak into Mic")
81
+
82
+ submit_btn = gr.Button("πŸš€ Transcribe Audio", variant="primary", size="lg")
83
+ clear_btn = gr.ClearButton([audio_upload, audio_mic])
84
+
85
+ # RIGHT COLUMN: Outputs
86
+ with gr.Column(scale=1):
87
+ gr.Markdown("### 2. Transcription Result")
88
+ output_text = gr.Textbox(
89
+ label="Transcribed Text",
90
+ lines=8,
91
+ show_copy_button=True,
92
+ placeholder="Your transcription will appear here..."
93
+ )
94
+
95
+ with gr.Row():
96
+ metrics = gr.Textbox(label="Processing Time", value="0.00s", interactive=False)
97
+
98
+ # ── FOOTER ──
99
+ gr.Markdown("---")
100
+ gr.Markdown(
101
+ """
102
+ **System Specs:** `Parakeet-tdt-0.6b-v2` Base | `Custom LoRA Adapter` | `Greedy Decoding`
103
+ """
104
+ )
105
+
106
+ # ── EVENT WIRING ──
107
+ submit_btn.click(
108
+ fn=transcribe_audio,
109
+ inputs=audio_upload,
110
+ outputs=[output_text, metrics]
111
+ )
112
+ submit_btn.click(
113
+ fn=transcribe_audio,
114
+ inputs=audio_mic,
115
+ outputs=[output_text, metrics]
116
+ )
117
+
118
+ if __name__ == "__main__":
 
 
 
 
 
119
  demo.launch()