| |
|
|
| import os |
| from pathlib import Path |
| import requests |
| import librosa |
| from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, pipeline |
| from transformers.utils import logging |
| import soundfile as sf |
|
|
| |
| model_ids = { |
| "Multilingual models": [ |
| "openai/whisper-large-v3-turbo", |
| "openai/whisper-large-v3", |
| "openai/whisper-large-v2", |
| "openai/whisper-large", |
| "openai/whisper-medium", |
| "openai/whisper-small", |
| "openai/whisper-base", |
| "openai/whisper-tiny", |
| ], |
| "English-only models": [ |
| "distil-whisper/distil-large-v2", |
| "distil-whisper/distil-large-v3", |
| "distil-whisper/distil-medium.en", |
| "distil-whisper/distil-small.en", |
| "openai/whisper-medium.en", |
| "openai/whisper-small.en", |
| "openai/whisper-base.en", |
| "openai/whisper-tiny.en", |
| ], |
| } |
|
|
| def download_file(url, filename, directory="."): |
| """ |
| Descarga un archivo desde una URL y lo guarda en el directorio especificado. |
| """ |
| os.makedirs(directory, exist_ok=True) |
| filepath = Path(directory) / filename |
| response = requests.get(url) |
| filepath.write_bytes(response.content) |
| return filepath |
|
|
| def transcribe_audio(file_path, model_name): |
| """ |
| Transcribe el audio utilizando un modelo de Whisper. |
| |
| Args: |
| file_path (str): Ruta del archivo de audio. |
| model_name (str): Nombre del modelo de Whisper. |
| |
| Returns: |
| str: Transcripci贸n del audio. |
| """ |
| processor = AutoProcessor.from_pretrained(model_name) |
| model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name) |
| |
| |
| pipe = pipeline( |
| "automatic-speech-recognition", |
| model=model, |
| tokenizer=processor.tokenizer, |
| feature_extractor=processor.feature_extractor, |
| device="cpu", |
| ) |
| |
| |
| audio_data, samplerate = librosa.load(file_path, sr=16000) |
| |
| |
| result = pipe(audio_data) |
| return result["text"] |
|
|
| def guardar_transcripcion(texto, filename="transcripcion.txt", directory="../results"): |
| """ |
| Guarda el texto transcrito en un archivo .txt en el directorio especificado. |
| |
| Args: |
| texto (str): Texto transcrito que se desea guardar. |
| filename (str): Nombre del archivo .txt. |
| directory (str): Directorio donde se guardar谩 el archivo. |
| """ |
| os.makedirs(directory, exist_ok=True) |
| file_path = Path(directory) / filename |
| with open(file_path, "w", encoding="utf-8") as f: |
| f.write(texto) |
| print(f"Transcripci贸n guardada en: {file_path}") |
|
|
| def main(): |
| |
| logging.set_verbosity_error() |
| |
| |
| audio_path = os.path.abspath("../miwav2lipv6/assets/audio/grabacion_gradio.wav") |
|
|
| |
| model_name = "openai/whisper-large" |
|
|
| |
| print(f"Transcribiendo el audio del archivo: {audio_path}") |
| transcription = transcribe_audio(audio_path, model_name) |
| print(f"Transcripci贸n: {transcription}") |
|
|
| |
| guardar_transcripcion(transcription) |
|
|
| if __name__ == "__main__": |
| main() |
|
|