vocal-sync-intelligence / src /transcription /file_transcriber.py
Fnu Mahnoor
add repo
bf2d622
import numpy as np
from faster_whisper import WhisperModel
import librosa
import logging
logger = logging.getLogger(__name__)
class FileTranscriber:
def __init__(self, model_size="base", device="cpu", compute_type="int8"):
self.model_size = model_size
self.device = device
self.compute_type = compute_type
self.model = None
def load_model(self):
if self.model is None:
logger.info(f"Loading Whisper model: {self.model_size}")
try:
self.model = WhisperModel(self.model_size, device=self.device, compute_type=self.compute_type)
logger.info("Model loaded successfully")
except Exception as e:
logger.error(f"Failed to load model: {e}")
raise
def transcribe_file(self, file_path):
self.load_model()
try:
# Load audio
audio_data, sample_rate = librosa.load(file_path, sr=16000)
audio_data = audio_data.astype(np.float32)
# Transcribe
segments, _ = self.model.transcribe(audio_data, beam_size=5, vad_filter=True)
transcription = ""
for segment in segments:
text = segment.text.strip()
if text:
transcription += text + " "
return transcription.strip()
except Exception as e:
logger.error(f"Error transcribing file {file_path}: {e}")
return f"Error: {str(e)}"