chappi-e / src /tools /transcriber.py
santiagoahl
Integrate Project work to HF template
5dfdf10
import whisper
from langchain_core.tools import tool
import os
@tool
def transcriber(audio_path: str, use_gpu: bool = False) -> str:
"""
Transcribes an audio file
Parameters
----------
audio_path : str or Path
Path to an existing audio file (e.g. .wav, .mp3). Must be readable by ffmpeg.
use_gpu: bool
Pass True if you are in a colab GPU environment or you have an integrated Nvidia GPU
Returns:
str: Text of the transcript
"""
model_size = "tiny"
ai_model = (
whisper.load_model(model_size).cuda()
if use_gpu
else whisper.load_model(model_size)
)
raw_transcript = ai_model.transcribe(
audio_path,
word_timestamps=False,
no_speech_threshold=0.5,
condition_on_previous_text=True,
compression_ratio_threshold=2.0,
)
transcript = raw_transcript["text"]
return transcript
if __name__ == "__main__":
#audio_path = "~/.cache/huggingface/hub/datasets--gaia-benchmark--GAIA/snapshots/897f2dfbb5c952b5c3c1509e648381f9c7b70316/2023/validation/99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"#input("Pass your audio path to transcribe: ")
#audio_path = os.path.expanduser(audio_path)
audio_path = "data/temp/yt_audio.mp3"
print("=" * 30, "\nTranscription\n", "=" * 30, "\n", transcriber(audio_path))
# TODO: include unit testing modules