File size: 1,134 Bytes
c3a047c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import torch
from transformers import pipeline

class STTEngine:
    def __init__(self, model_name="openai/whisper-small"):
        # Load Whisper pipeline for automatic speech recognition
        device = 0 if torch.cuda.is_available() else -1
        self.asr = pipeline(
            "automatic-speech-recognition",
            model=model_name,
            device=device
        )

    def transcribe(self, audio_path, language="en"):
        """

        Transcribe audio file to text using Whisper.

        Args:

            audio_path (str): path to .wav file

            language (str): ISO code ('en', 'ha', 'yo', 'ig')

        """
        if audio_path is None:
            return ""

        # Map Nigerian language names to Whisper codes
        lang_map = {
            "english": "en",
            "hausa": "ha",
            "yoruba": "yo",
            "igbo": "ig",
            "pidgin": "pcm",
        }
        whisper_lang = lang_map.get(language.lower(), "en")

        result = self.asr(audio_path, generate_kwargs={"language": whisper_lang})
        return result["text"].strip()