nambn0321 commited on
Commit
5561e0e
·
verified ·
1 Parent(s): 3c28a5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -3,7 +3,7 @@ import torchaudio
3
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
4
  import gradio as gr
5
 
6
- MODEL_PATH = "nambn0321/ASR_models" # Your HF model repo
7
  processor = Wav2Vec2Processor.from_pretrained(MODEL_PATH)
8
  model = Wav2Vec2ForCTC.from_pretrained(MODEL_PATH).eval()
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -12,13 +12,13 @@ model.to(device)
12
  def transcribe(audio):
13
  try:
14
  if audio is None:
15
- return "No audio provided."
16
 
17
  sr, data = audio
18
  print(f"Sample rate: {sr}, Audio shape: {len(data)}")
19
 
20
  waveform = torch.tensor(data, dtype=torch.float32).unsqueeze(0)
21
- waveform = waveform / 32768.0 # normalize 16-bit PCM
22
 
23
  if sr != 16000:
24
  resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
@@ -45,6 +45,6 @@ gr.Interface(
45
  fn=transcribe,
46
  inputs=gr.Audio(type="numpy", label="Upload WAV/MP3 file"),
47
  outputs=gr.Textbox(label="Transcription"),
48
- title=" ASR Demo with Wav2Vec2",
49
- description="Upload an audio file (WAV or MP3) and get the transcription using your fine-tuned model.",
50
  ).launch()
 
3
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
4
  import gradio as gr
5
 
6
+ MODEL_PATH = "nambn0321/ASR_models"
7
  processor = Wav2Vec2Processor.from_pretrained(MODEL_PATH)
8
  model = Wav2Vec2ForCTC.from_pretrained(MODEL_PATH).eval()
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
12
  def transcribe(audio):
13
  try:
14
  if audio is None:
15
+ return "No audio provided"
16
 
17
  sr, data = audio
18
  print(f"Sample rate: {sr}, Audio shape: {len(data)}")
19
 
20
  waveform = torch.tensor(data, dtype=torch.float32).unsqueeze(0)
21
+ waveform = waveform / 32768.0
22
 
23
  if sr != 16000:
24
  resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
 
45
  fn=transcribe,
46
  inputs=gr.Audio(type="numpy", label="Upload WAV/MP3 file"),
47
  outputs=gr.Textbox(label="Transcription"),
48
+ title=" ASR Demo oMGMGGOMGOMGOGMOG",
49
+ description="Upload an audio file (WAV or MP3) and get the transcription.",
50
  ).launch()