| from utils.central_logging import get_logger |
| from langchain_openai import OpenAIEmbeddings |
| from pathlib import Path |
| import whisper |
| import threading |
| import os |
|
|
|
|
| logger = get_logger("whisper") |
|
|
| _whisper_model = None |
| _lock = threading.Lock() |
| _embedding = None |
| _embedding_lock = threading.Lock() |
|
|
| def get_whisper(): |
| global _whisper_model |
|
|
| if _whisper_model is None: |
| with _lock: |
| if _whisper_model is None: |
| _whisper_model = whisper.load_model("base") |
| logger.info("Whisper model has been loaded") |
| return _whisper_model |
|
|
| def get_embedding(): |
| global _embedding |
|
|
| if _embedding is None: |
| with _embedding_lock: |
| if _embedding is None: |
| _embedding = OpenAIEmbeddings(model="text-embedding-ada-002") |
| logger.info("Openai embedding has been initialized") |
| return _embedding |
|
|
|
|
| def transcribe_content(url_path:str) -> str: |
| safe_path = Path(url_path).resolve().as_posix() |
| model = get_whisper() |
| result = model.transcribe(url_path) |
| return result["text"] |
|
|
|
|
| def save_file(file_name,result): |
| with open(file_name,'w') as file: |
| file.write(result) |
|
|
|
|