|
|
import whisper
|
|
|
|
|
|
class WhisperModel(object):
|
|
|
|
|
|
def __init__(self,model_type):
|
|
|
self.model = whisper.load_model("base")
|
|
|
|
|
|
def transcribe_audio(self,file_path):
|
|
|
try:
|
|
|
result = self.model.transcribe(file_path)
|
|
|
return result
|
|
|
except Exception as e:
|
|
|
print(f"Error {e}")
|
|
|
raise Exception(f'Error trnascribe audio file {e}')
|
|
|
|
|
|
def get_text(self,transcription):
|
|
|
return transcription['text']
|
|
|
|
|
|
def get_detected_language(self,transcription):
|
|
|
return transcription['language']
|
|
|
|
|
|
def get_segments(self,transcription):
|
|
|
text_segments = []
|
|
|
for segment in transcription['segments']:
|
|
|
text_segments.append({
|
|
|
"text": segment['text'],
|
|
|
"start": segment['start'],
|
|
|
"end": segment['end'],
|
|
|
"id": segment['id'],
|
|
|
})
|
|
|
return text_segments
|
|
|
|
|
|
def detect_language(self,file_path):
|
|
|
try:
|
|
|
audio = whisper.load_audio(file_path)
|
|
|
audio = whisper.pad_or_trim(audio)
|
|
|
|
|
|
mel = whisper.log_mel_spectrogram(audio).to(self.model.device)
|
|
|
|
|
|
_, probs = self.model.detect_language(mel)
|
|
|
print(f"Detected language: {max(probs, key=probs.get)}")
|
|
|
return max(probs, key=probs.get)
|
|
|
except Exception as e:
|
|
|
print(f"Error {e}")
|
|
|
raise Exception(f'Error detecting language {e}')
|
|
|
|
|
|
|
|
|
|