Spaces:

nambn0321
/

My_First_ASR

Sleeping

nambn0321 commited on Jul 22, 2025

Commit

5561e0e

verified ·

1 Parent(s): 3c28a5c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import torchaudio
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 import gradio as gr
-MODEL_PATH = "nambn0321/ASR_models"  # Your HF model repo
 processor = Wav2Vec2Processor.from_pretrained(MODEL_PATH)
 model = Wav2Vec2ForCTC.from_pretrained(MODEL_PATH).eval()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -12,13 +12,13 @@ model.to(device)
 def transcribe(audio):
     try:
         if audio is None:
-            return "No audio provided."
         sr, data = audio
         print(f"Sample rate: {sr}, Audio shape: {len(data)}")
         waveform = torch.tensor(data, dtype=torch.float32).unsqueeze(0)
-        waveform = waveform / 32768.0  # normalize 16-bit PCM
         if sr != 16000:
             resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
@@ -45,6 +45,6 @@ gr.Interface(
     fn=transcribe,
     inputs=gr.Audio(type="numpy", label="Upload WAV/MP3 file"),
     outputs=gr.Textbox(label="Transcription"),
-    title=" ASR Demo with Wav2Vec2",
-    description="Upload an audio file (WAV or MP3) and get the transcription using your fine-tuned model.",
 ).launch()

 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 import gradio as gr
+MODEL_PATH = "nambn0321/ASR_models"
 processor = Wav2Vec2Processor.from_pretrained(MODEL_PATH)
 model = Wav2Vec2ForCTC.from_pretrained(MODEL_PATH).eval()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def transcribe(audio):
     try:
         if audio is None:
+            return "No audio provided"
         sr, data = audio
         print(f"Sample rate: {sr}, Audio shape: {len(data)}")
         waveform = torch.tensor(data, dtype=torch.float32).unsqueeze(0)
+        waveform = waveform / 32768.0
         if sr != 16000:
             resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
     fn=transcribe,
     inputs=gr.Audio(type="numpy", label="Upload WAV/MP3 file"),
     outputs=gr.Textbox(label="Transcription"),
+    title=" ASR Demo oMGMGGOMGOMGOGMOG",
+    description="Upload an audio file (WAV or MP3) and get the transcription.",
 ).launch()