cigol123 commited on
Commit
9b83916
·
verified ·
1 Parent(s): 9f6ab6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -26
app.py CHANGED
@@ -5,34 +5,30 @@ import soundfile as sf
5
  import numpy as np
6
  from scipy import signal
7
 
8
- # Ensure the model runs on GPU if available
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- print(f"Running on device: {device}")
 
 
 
 
11
 
12
- # Load the model and processor
13
- print("Loading Whisper model for Macedonian transcription...")
14
- processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
15
- model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3").to(device)
16
- print("✓ Model loaded successfully!")
17
 
18
  def process_audio(audio_path):
19
- try:
20
- # Load and resample to 16kHz using scipy
21
- waveform, sr = sf.read(audio_path)
22
- if len(waveform.shape) > 1: # Convert stereo to mono
23
- waveform = waveform.mean(axis=1)
24
- if sr != 16000: # Resample if necessary
25
- num_samples = int(len(waveform) * 16000 / sr)
26
- waveform = signal.resample(waveform, num_samples)
27
-
28
- # Process the audio
29
- inputs = processor(waveform, sampling_rate=16000, return_tensors="pt").to(device)
30
- print("Transcribing...")
31
- predicted_ids = model.generate(**inputs, language="mk")
32
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
33
- return transcription
34
- except Exception as e:
35
- return f"Error during transcription: {str(e)}"
36
 
37
  # Gradio interface
38
  demo = gr.Interface(
@@ -44,4 +40,4 @@ demo = gr.Interface(
44
  )
45
 
46
  if __name__ == "__main__":
47
- demo.launch(share=True)
 
5
  import numpy as np
6
  from scipy import signal
7
 
8
+ # Load the Whisper model and processor directly from Hugging Face
9
+ def load_model():
10
+ print("Loading Whisper model and processor...")
11
+ processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
12
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3")
13
+ print("✓ Model and processor loaded successfully!")
14
+ return processor, model
15
 
16
+ processor, model = load_model()
 
 
 
 
17
 
18
  def process_audio(audio_path):
19
+ # Load and resample to 16kHz using scipy
20
+ waveform, sr = sf.read(audio_path)
21
+ if len(waveform.shape) > 1: # Convert stereo to mono
22
+ waveform = waveform.mean(axis=1)
23
+ if sr != 16000: # Resample if necessary
24
+ num_samples = int(len(waveform) * 16000 / sr)
25
+ waveform = signal.resample(waveform, num_samples)
26
+
27
+ # Process the audio
28
+ inputs = processor(waveform, sampling_rate=16000, return_tensors="pt")
29
+ predicted_ids = model.generate(**inputs, language="mk")
30
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
31
+ return transcription
 
 
 
 
32
 
33
  # Gradio interface
34
  demo = gr.Interface(
 
40
  )
41
 
42
  if __name__ == "__main__":
43
+ demo.launch()