Spaces:
Runtime error
Runtime error
File size: 5,535 Bytes
528af16 099407f 528af16 4648c99 528af16 61b8b5c 4648c99 61b8b5c 528af16 61b8b5c 4648c99 61b8b5c 528af16 61b8b5c 528af16 4648c99 528af16 4648c99 528af16 099407f 528af16 099407f 528af16 099407f 528af16 099407f 528af16 4648c99 528af16 4648c99 528af16 099407f 528af16 4648c99 528af16 4648c99 528af16 f735115 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | import gradio as gr
import nemo.collections.asr as nemo_asr
import time
from huggingface_hub import hf_hub_download
import librosa
import soundfile as sf
# βββββββββββββββββββββββββββββββββββββββββββββ
# 1. MODEL LOADING (Runs once when server starts)
# βββββββββββββββββββββββββββββββββββββββββββββ
print("Downloading your Full Custom Model from the Hub...")
# This safely pulls your 2.5GB model from your unlimited Model repository!
custom_model_path = hf_hub_download(repo_id="harphool17/parakeet-asr-adapter", filename="ASR-Adapter.nemo")
print("Booting up the model engine...")
# Unpacks the .nemo file and loads everything inside
model = nemo_asr.models.EncDecRNNTBPEModel.restore_from(custom_model_path)
model.eval()
print("β
Custom Parakeet Engine Online! Server Ready.")
# βββββββββββββββββββββββββββββββββββββββββββββ
# 2. INFERENCE FUNCTION
# βββββββββββββββββββββββββββββββββββββββββββββ
def transcribe_audio(file_upload, mic_upload):
# Smartly pick whichever tab actually has audio in it
audio_filepath = file_upload if file_upload is not None else mic_upload
if audio_filepath is None:
return "Please upload or record an audio file.", "0.00s"
try:
start_time = time.time()
# --- AUDIO SANITIZER (Fixes the Stereo/Shape Crash) ---
# Forces the audio to be Mono (1 channel) and 16,000 Hz
y, sr = librosa.load(audio_filepath, sr=16000, mono=True)
# Save the clean mono audio to a temporary file
clean_audio_path = "clean_temp.wav"
sf.write(clean_audio_path, y, sr)
# --- RUN INFERENCE ---
# Pass the CLEAN file to the model, not the raw upload
transcription = model.transcribe([clean_audio_path])
# Extract text safely (handles the Hypothesis object bug)
if isinstance(transcription, tuple):
raw_result = transcription[0][0]
else:
raw_result = transcription[0]
if hasattr(raw_result, 'text'):
result_text = raw_result.text
else:
result_text = str(raw_result)
process_time = time.time() - start_time
time_str = f"{process_time:.2f} seconds"
return result_text, time_str
except Exception as e:
return f"An error occurred: {str(e)}", "Error"
# βββββββββββββββββββββββββββββββββββββββββββββ
# 3. THE "PRO" DASHBOARD UI
# βββββββββββββββββββββββββββββββββββββββββββββ
theme = gr.themes.Soft(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="slate",
font=[gr.themes.GoogleFont("Inter"), "sans-serif"]
)
with gr.Blocks(theme=theme, title="Parakeet ASR") as demo:
# ββ HEADER ββ
gr.Markdown(
"""
# ποΈ Next-Gen Speech Recognition
### Built with NVIDIA Parakeet & Custom Fine-Tuning
*This model was fine-tuned offline to achieve a highly competitive **0.29 Word Error Rate** on a rigorous test dataset.*
"""
)
# ββ MAIN LAYOUT (Two Columns) ββ
with gr.Row():
# LEFT COLUMN: Inputs
with gr.Column(scale=1):
gr.Markdown("### 1. Input Audio")
with gr.Tabs():
with gr.TabItem("Upload File"):
audio_upload = gr.Audio(sources=["upload"], type="filepath", label="Audio File")
with gr.TabItem("Record Microphone"):
audio_mic = gr.Audio(sources=["microphone"], type="filepath", label="Speak into Mic")
submit_btn = gr.Button("π Transcribe Audio", variant="primary", size="lg")
clear_btn = gr.ClearButton([audio_upload, audio_mic])
# RIGHT COLUMN: Outputs
with gr.Column(scale=1):
gr.Markdown("### 2. Transcription Result")
output_text = gr.Textbox(
label="Transcribed Text",
lines=8,
placeholder="Your transcription will appear here..."
)
with gr.Row():
metrics = gr.Textbox(label="Processing Time", value="0.00s", interactive=False)
# ββ FOOTER ββ
gr.Markdown("---")
gr.Markdown(
"""
**System Specs:** `Parakeet-tdt-0.6b-v2` Base | `Custom LoRA Adapter` | `Greedy Decoding`
"""
)
# ββ EVENT WIRING ββ
# Single click event that checks both inputs simultaneously to stop the ghost-click bug
submit_btn.click(
fn=transcribe_audio,
inputs=[audio_upload, audio_mic],
outputs=[output_text, metrics]
)
# βββββββββββββββββββββββββββββββββββββββββββββ
# 4. LAUNCH
# βββββββββββββββββββββββββββββββββββββββββββββ
if __name__ == "__main__":
demo.launch() |