cigol123 commited on
Commit
9ece3ee
·
verified ·
1 Parent(s): 28fc548

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
4
+ import soundfile as sf
5
+ import numpy as np
6
+ from scipy import signal
7
+
8
+ # Ensure the model runs on GPU if available
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ print(f"Running on device: {device}")
11
+
12
+ # Load the model and processor
13
+ print("Loading Whisper model for Macedonian transcription...")
14
+ processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
15
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3").to(device)
16
+ print("✓ Model loaded successfully!")
17
+
18
+ def process_audio(audio_path):
19
+ try:
20
+ # Load and resample to 16kHz using scipy
21
+ waveform, sr = sf.read(audio_path)
22
+ if len(waveform.shape) > 1: # Convert stereo to mono
23
+ waveform = waveform.mean(axis=1)
24
+ if sr != 16000: # Resample if necessary
25
+ num_samples = int(len(waveform) * 16000 / sr)
26
+ waveform = signal.resample(waveform, num_samples)
27
+
28
+ # Process the audio
29
+ inputs = processor(waveform, sampling_rate=16000, return_tensors="pt").to(device)
30
+ print("Transcribing...")
31
+ predicted_ids = model.generate(**inputs, language="mk")
32
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
33
+ return transcription
34
+ except Exception as e:
35
+ return f"Error during transcription: {str(e)}"
36
+
37
+ # Gradio interface
38
+ demo = gr.Interface(
39
+ fn=process_audio,
40
+ inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
41
+ outputs="text",
42
+ title="Македонско препознавање на говор / Macedonian Speech Recognition",
43
+ description="Качете аудио или користете микрофон за транскрипција на македонски говор / Upload audio or use microphone to transcribe Macedonian speech"
44
+ )
45
+
46
+ if __name__ == "__main__":
47
+ demo.launch(share=True)