prueba

by fireedman - opened Jun 3, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+376

-2707

This PR is in draft mode

Files changed (23) hide show

.gitignore +24 -25
app.py +0 -183
estructura_proyecto.txt +34 -34
requirements.txt +31 -30
results/OpenAI_response.txt +0 -5
results/transcripcion.txt +0 -1
setup.py +287 -274
src/.gradio/certificate.pem +0 -31
src/audio_recorder.py +0 -48
src/call_openai_api.py +0 -80
src/convert_models.py +0 -16
src/gradio_helper.py +0 -26
src/interface.py +0 -60
src/interfaceV2.py +0 -183
src/ov_inference.py +0 -637
src/ov_wav2lip_helper.py +0 -68
src/run_inference.py +0 -67
src/text_to_speech.py +0 -36
src/utils/notebook_utils.py +0 -708
src/utils/pip_helper.py +0 -10
src/whisper_audio_extractor.py +0 -47
src/whisper_audio_transcriber.py +0 -109
tests/test_whisper_audio_extractor.py +0 -29

.gitignore CHANGED Viewed

@@ -1,25 +1,24 @@
-#Ignorar entorno virtual
-env/
-#Ignorar archivos y carpetas de compilacion
-__pycache__/
-*.pyc
-*.pyo
-*.py[cod]
-.vscode/
-.DS_Store
-#Ignorar archivos de log y salida de pruebas
-*.log
-*.out
-*.tmp
-#Ignorar modelos y checkpoints
-models/
-checkpoints/
-src/Wav2Lip/
-assets/
-data/
-#Archivos temporales y de sistema

+#Ignorar entorno virtual
+env/
+#Ignorar archivos y carpetas de compilacion
+__pycache__/
+*.pyc
+*.pyo
+*.py[cod]
+.vscode/
+.DS_Store
+#Ignorar archivos de log y salida de pruebas
+*.log
+*.out
+*.tmp
+#Ignorar modelos y checkpoints
+models/
+checkpoints/
+src/Wav2Lip/
+assets/
+data/
+#Archivos temporales y de sistema

app.py DELETED Viewed

@@ -1,183 +0,0 @@
-# interfaceV2.py
-import gradio as gr
-import sounddevice as sd
-from scipy.io.wavfile import write
-import tempfile
-import shutil
-import os
-import subprocess
-import sys
-from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
-from call_openai_api import  moni as rtff   # Asegúrate de que el archivo call_open_api.py esté en el mismo directorio
-# Paths to files (adjusted as per your specified structure)
-AUDIO_RECORD_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/grabacion_gradio.wav")
-#VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun_5s.mp4")
-VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun.mp4")
-#TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
-TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
-RESULT_AUDIO_TEMP_PATH = os.path.abspath( "C:/programacionEjercicios/miwav2lipv6/results/audiov2.wav")
-RESULT_AUDIO_FINAL_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/audio.wav")
-RESULT_VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/result_voice.mp4")
-TEXT_TO_SPEECH_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/text_to_speech.py")
-# Function to record 8-second audio
-def grabar_audio(duration=8, sample_rate=44100):
-    print("Starting recording...")
-    audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
-    print(f"Recording in progress for {duration} seconds...")
-    sd.wait()
-    print("Recording completed.")
-    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-    write(temp_audio.name, sample_rate, audio_data)
-    print("Audio temporarily saved at:", temp_audio.name)
-    temp_audio.close()  # Asegúrate de cerrarlo antes de usarlo
-    os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
-    shutil.copy(temp_audio.name, AUDIO_RECORD_PATH)
-    print(f"Recording copied to: {AUDIO_RECORD_PATH}")
-    return AUDIO_RECORD_PATH, "Recording completed."
-# Function to transcribe audio with Whisper
-def transcribir_con_progreso(audio_path):
-    progreso = gr.Progress()
-    progreso(0, "Starting transcription...")
-    model_name = "openai/whisper-large"
-    progreso(25, "Loading Whisper model...")
-    transcripcion = transcribe_audio(audio_path, model_name)
-    progreso(75, "Saving transcription...")
-    guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
-    progreso(100, "Transcription completed.")
-    if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
-        raise FileNotFoundError(f"El archivo {TRANSCRIPTION_TEXT_PATH} no se generó.")
-    return transcripcion
-# Function to convert text to audio using text_to_speech.py
-def generar_audio_desde_texto():
-    print("Generating audio from text...")
-    result = subprocess.run(
-        [sys.executable, TEXT_TO_SPEECH_PATH],
-        capture_output=True,
-        text=True
-    )
-    if result.returncode != 0:
-        raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
-    if result.stdout:
-        print("Output:", result.stdout)
-    if result.stderr:
-        print("Errors:", result.stderr)
-    if os.path.exists(RESULT_AUDIO_TEMP_PATH):
-        print(f"Temporary audio generated at: {RESULT_AUDIO_TEMP_PATH}")
-        os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
-        shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
-        print(f"Final audio copied to: {RESULT_AUDIO_FINAL_PATH}")
-        return RESULT_AUDIO_FINAL_PATH
-    else:
-        print(f"Error: Audio file was not generated in {RESULT_AUDIO_FINAL_PATH} ")
-        return None
-# Function to process video and audio using run_inference.py with the generated audio file
-def procesar_video_audio():
-    print("Starting video and audio processing...")
-    run_inference_path = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/run_inference.py")
-    result = subprocess.run(
-        [sys.executable, run_inference_path, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
-        capture_output=True,
-        text=True
-    )
-    if result.stdout:
-        print("Output:", result.stdout)
-    if result.stderr:
-        print("Errors:", result.stderr)
-    if os.path.exists(RESULT_VIDEO_PATH):
-        print(f"Processed video saved at: {RESULT_VIDEO_PATH}")
-        return RESULT_VIDEO_PATH
-    else:
-        print("Error: Video file was not generated at 'results/result_voice.mp4'")
-        return None
-# Gradio Interface Configuration
-def interfaz():
-    with gr.Blocks() as demo:
-        with gr.Row():
-            with gr.Column():
-                gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
-                grabar_button = gr.Button("Comenzando la grabacion de audio")
-                estado_grabacion = gr.Textbox(label="Recording Status", interactive=False)
-            with gr.Column():
-                output_audio = gr.Audio(AUDIO_RECORD_PATH, label="Audio Grabado", interactive=False)
-                output_audio_speech = gr.Audio(RESULT_AUDIO_FINAL_PATH, label="Audio TTS", interactive=False)
-                video_resultado = gr.Video(RESULT_VIDEO_PATH,label="Video procesado", interactive=False)
-                texto_transcripcion = gr.Textbox(label="Texto transcrito")
-                progreso_transcripcion = gr.Textbox(label="Transcription Status", interactive=False)
-            # Full flow: recording, transcription, text-to-speech, and video processing
-            """
-            def flujo_completo():
-                _, mensaje_grabacion = grabar_audio()
-                transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
-                audio_generado = generar_audio_desde_texto()
-                video_path = procesar_video_audio()
-                # Ensure function always returns 5 outputs for Gradio, even in error cases
-                if video_path and audio_generado:
-                    return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
-                else:
-                    return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado or "Audio generation failed", video_path or "Video generation failed"
-            """
-            def flujo_completo():
-                try:
-                    print("Inicio del flujo completo...")
-                    # Grabar audio
-                    audio_path, mensaje_grabacion = grabar_audio()
-                    print("Audio grabado en:", audio_path)
-                    # Transcribir audio
-                    transcripcion = transcribir_con_progreso(audio_path)
-                    print("Transcripción completada:", transcripcion)
-                    #respuesta_openai = rtff(transcripcion)
-                    respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
-                    print("Respuesta generada  por OpenAI")
-                    # Generar audio desde texto
-                    audio_generado = generar_audio_desde_texto()
-                    print("Audio generado:", audio_generado)
-                    # Procesar video y audio
-                    video_path = procesar_video_audio()
-                    print("Video procesado en:", video_path)
-                    # Devolver resultados si todo fue exitoso
-                    return mensaje_grabacion, audio_path, transcripcion, audio_generado, video_path
-                except Exception as e:
-                    # Imprime el error en la terminal y regresa mensajes de error a la interfaz
-                    print("Error detectado en flujo completo:", str(e))
-                    return (
-                        "Error durante el flujo completo",
-                        None,  # Audio grabado
-                        f"Error: {str(e)}",  # Transcripción
-                        None,  # Audio generado
-                        None   # Video procesado
-                            )
-            grabar_button.click(
-                flujo_completo,
-                outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
-            )
-    return demo
-if __name__ == "__main__":
-    demo = interfaz()
-    demo.launch(allowed_paths=["C:/programacionEjercicios/miwav2lipv6/assets", "C:/programacionEjercicios/miwav2lipv6/results"])

estructura_proyecto.txt CHANGED Viewed

@@ -1,34 +1,34 @@
-proyecto_root/
-│
-├── assets/
-   ├── video/
-│   │   ├──data_video_sun_5s.mp4
-│   └── audio/
-│       └──data_audio_sun_5s.wav
-│   └── # Archivos de datos, audio y video de prueba, como `data_audio_sun_5s.wav`
-│
-├── checkpoints/
-│   └── # Modelos y checkpoints preentrenados, como `wav2lip_gan.pth`
-│
-├── models/
-│   └── # Modelos convertidos a OpenVINO IR, como `face_detection.xml` y `wav2lip.xml`
-│
-├── src/
-|   ├── utils/
-|   ├── Wav2Lip/
-│   ├── convert_models.py
-│   ├── gradio_helper.py
-│   ├── ov_inference.py
-│   ├── ov_wav2lip_helper.py
-│   └── run_inference
-│
-├── tests/
-│   └── # Scripts de pruebas para verificar la funcionalidad de tu código
-│
-├── results/
-│   └── result_voice.mp4
-│
-├── requirements.txt            # Lista de dependencias del proyecto
-├── setup.py                    # Script de configuración del proyecto
-├── estructura_proyecto.py                    # Script de configuración del proyecto
-└── README.md                   # Documentación del proyecto

+proyecto_root/
+│
+├── assets/
+   ├── video/
+│   │   ├──data_video_sun_5s.mp4
+│   └── audio/
+│       └──data_audio_sun_5s.wav
+│   └── # Archivos de datos, audio y video de prueba, como `data_audio_sun_5s.wav`
+│
+├── checkpoints/
+│   └── # Modelos y checkpoints preentrenados, como `wav2lip_gan.pth`
+│
+├── models/
+│   └── # Modelos convertidos a OpenVINO IR, como `face_detection.xml` y `wav2lip.xml`
+│
+├── src/
+|   ├── utils/
+|   ├── Wav2Lip/
+│   ├── convert_models.py
+│   ├── gradio_helper.py
+│   ├── ov_inference.py
+│   ├── ov_wav2lip_helper.py
+│   └── run_inference
+│
+├── tests/
+│   └── # Scripts de pruebas para verificar la funcionalidad de tu código
+│
+├── results/
+│   └── result_voice.mp4
+│
+├── requirements.txt            # Lista de dependencias del proyecto
+├── setup.py                    # Script de configuración del proyecto
+├── estructura_proyecto.py                    # Script de configuración del proyecto
+└── README.md                   # Documentación del proyecto

requirements.txt CHANGED Viewed

@@ -1,30 +1,31 @@
-openvino>=2024.4.0
-huggingface_hub
-torch>=2.1
-gradio>=4.19
-librosa==0.9.2
-opencv-contrib-python
-opencv-python
-IPython
-tqdm
-numba
-numpy
-openai-whisper
-sounddevice
-scipy
-transformers>=4.35
-torchvision>=0.18.1
-onnx>=1.16.1
-optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
-openvino
-openvino-tokenizers
-openvino-genai
-datasets
-soundfile>=0.12
-python-ffmpeg<=1.0.16
-nncf>=2.13.0
-jiwer
-gtts

+openvino>=2024.4.0
+huggingface_hub
+torch>=2.1
+gradio>=4.19
+librosa==0.9.2
+opencv-contrib-python
+opencv-python
+IPython
+tqdm
+numba
+numpy
+openai-whisper
+sounddevice
+scipy
+transformers>=4.35
+torchvision>=0.18.1
+onnx>=1.16.1
+optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
+openvino
+openvino-tokenizers
+openvino-genai
+datasets
+soundfile>=0.12
+python-ffmpeg<=1.0.16
+nncf>=2.13.0
+jiwer
+gtts

results/OpenAI_response.txt DELETED Viewed

@@ -1,5 +0,0 @@
-Hola, prueba en marcha,
-María con IA se realza,
-Nuevo modelo se lanza,
-Incorporación, esperanza,
-Ser mejor, nuestra balanza.

results/transcripcion.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- Hola, esta es una prueba para ver si podemos incorporar este modelo a María, María RB.

setup.py CHANGED Viewed

@@ -1,274 +1,287 @@
-# 2024/03/11 setup.py
-import os
-import subprocess
-import sys
-import requests
-from pathlib import Path
-# Definición de las carpetas del proyecto
-PROJECT_DIRECTORIES = [
-    "assets",
-    "assets/audio",
-    "assets/video",
-    "checkpoints",
-    "models",
-    "src",
-    "src/utils",
-    "tests",
-    "results"
-]
-# URLs de las utilidades de OpenVINO Notebooks
-OPENVINO_UTILS = {
-    "notebook_utils.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
-    "pip_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py"
-}
-# URLs de los archivos de ayuda de Wav2Lip
-WAV2LIP_HELPERS = {
-    "gradio_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/gradio_helper.py",
-    "ov_inference.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/ov_inference.py",
-    "ov_wav2lip_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/ov_wav2lip_helper.py"
-}
-WAV2LIP_HELPERS_DIR = Path("src")
-OPENVINO_UTILS_DIR = Path("src/utils")
-# URLs de los archivos de ejemplo de entrada
-EXAMPLE_FILES = {
-    "audio_example": {
-        "filename": "data_audio_sun_5s.wav",
-        "url": "https://github.com/sammysun0711/openvino_aigc_samples/blob/main/Wav2Lip/data_audio_sun_5s.wav?raw=true",
-        "folder": "assets/audio"
-    },
-    "video_example": {
-        "filename": "data_video_sun_5s.mp4",
-        "url": "https://github.com/sammysun0711/openvino_aigc_samples/blob/main/Wav2Lip/data_video_sun_5s.mp4?raw=true",
-        "folder": "assets/video"
-    }
-}
-# Función para crear la estructura general del proyecto
-def create_project_structure():
-    """
-    Crea la estructura de las carpetas del proyecto
-    """
-    for directory in PROJECT_DIRECTORIES:
-        path = Path(directory)
-        if not path.exists():
-            path.mkdir(parents=True, exist_ok=True)
-            print(f"Carpeta '{directory}' creada.")
-        else:
-            print(f"Carpeta '{directory}' ya existe.")
-# Función para crear el entorno virtual
-def create_virtual_environment():
-    """
-    Crea el entorno virtual si no existe.
-    """
-    env_path = Path("env")
-    if not env_path.exists():
-        print("Creando el entorno virtual...")
-        subprocess.check_call([sys.executable, "-m", "venv", "env"])
-        print(f"Entorno virtual creado en '{env_path}'.")
-    else:
-        print(f"El entorno virtual '{env_path}' ya existe.")
-# Función que activa y define pip y python
-def activate_virtual_environment():
-    """
-    Activa el entorno virtual y devuelve las rutas de pip y python.
-    """
-    if os.name == 'nt':  # Windows
-        python_path = str(Path("env") / "Scripts" / "python.exe")
-        pip_path = str(Path("env") / "Scripts" / "pip.exe")
-    else:  # Unix/MacOS
-        python_path = str(Path("env") / "bin" / "python")
-        pip_path = str(Path("env") / "bin" / "pip")
-    # Actualizar pip a la última versión en el entorno virtual usando python -m pip
-    try:
-        subprocess.check_call([python_path, "-m", "pip", "install", "--upgrade", "pip"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-        print("pip actualizado a la última versión.")
-    except subprocess.CalledProcessError:
-        print("Error al actualizar pip.")
-    try:
-        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("Error al instalar tqdm.")
-    return python_path, pip_path
-# Funcion para instalar las dependencias desde requirements.txt con barra de progreso
-def install_requirements(pip_path):
-    """
-    Instala las dependencias de requirements.txt con una barra de progreso.
-    """
-    print("Instalando dependencias...")
-    # Instalar tqdm en el entorno virtual si no está instalado
-    try:
-        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("Error al instalar tqdm.")
-    from tqdm import tqdm  # Importar tqdm para la barra de progreso
-    # Leer requirements.txt y mostrar barra de progreso
-    requirements_path = Path("requirements.txt")
-    if not requirements_path.exists():
-        print("Archivo requirements.txt no encontrado.")
-        return
-    with open(requirements_path, "r") as f:
-        dependencies = f.read().splitlines()
-    # Instalar cada dependencia con barra de progreso
-    for dependency in tqdm(dependencies, desc="Instalando dependencias", unit="paquete"):
-        try:
-            subprocess.check_call([pip_path, "install", dependency], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-        except subprocess.CalledProcessError:
-            print(f"\nError al instalar {dependency}.")
-    print("Todas las dependencias fueron instaladas correctamente.")
-# Funcion para descargar los archivos de utilidades de OpenVINO Notebooks
-def download_openvino_utils(pip_path):
-    """
-    Descarga los archivos de utilidades de OpenVINO Notebooks en src/utils si no existen.
-    """
-    # Crear la carpeta de utilidades si no existe
-    OPENVINO_UTILS_DIR.mkdir(parents=True, exist_ok=True)
-    # Instalar requests en el entorno virtual si no está instalado
-    try:
-        subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("Error al instalar requests.")
-        # Instalar tqdm en el entorno virtual si no está instalado
-    try:
-        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("Error al instalar tqdm.")
-    from tqdm import tqdm  # Importar tqdm para la barra de progreso
-    for filename, url in tqdm(OPENVINO_UTILS.items(), desc="Descargando utilidades de OpenVINO", unit="archivo"):
-        file_path = OPENVINO_UTILS_DIR / filename
-        if not file_path.exists():
-            response = requests.get(url)
-            if response.status_code == 200:
-                with open(file_path, "wb") as f:
-                    f.write(response.content)
-            else:
-                print(f"Error al descargar {filename} desde {url}")
-# Función para descargar los archivos de ayuda específicos de Wav2Lip
-def download_wav2lip_helpers(pip_path):
-    """
-    Descarga los archivos de ayuda específicos de Wav2Lip si no existen.
-    """
-    WAV2LIP_HELPERS_DIR.mkdir(parents=True, exist_ok=True)  # Crea `src` si no existe
-    # Instalar requests en el entorno virtual si no está instalado
-    try:
-        subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("Error al instalar requests.")
-    try:
-        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("Error al instalar tqdm.")
-    from tqdm import tqdm  # Importar tqdm para la barra de progreso
-    for filename, url in tqdm(WAV2LIP_HELPERS.items(), desc="Descargando ayudas de Wav2Lip", unit="archivo"):
-        file_path = WAV2LIP_HELPERS_DIR / filename
-        if not file_path.exists():
-            response = requests.get(url)
-            if response.status_code == 200:
-                with open(file_path, "wb") as f:
-                    f.write(response.content)
-# Función para descargar los archivos de ejemplo de entrada (audio y video)
-def download_example_files():
-    """
-    Descarga los archivos de ejemplo de entrada (audio y video) en sus carpetas correspondientes.
-    """
-    # Instalar requests en el entorno virtual si no está instalado
-    try:
-        subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("Error al instalar requests.")
-    try:
-        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("Error al instalar tqdm.")
-    from tqdm import tqdm  # Importar tqdm para la barra de progreso
-    for example_name, example_info in tqdm(EXAMPLE_FILES.items(), desc="Descargando archivos de ejemplo", unit="archivo"):
-        folder_path = Path(example_info["folder"])
-        file_path = folder_path / example_info["filename"]
-        # Crear la carpeta si no existe
-        folder_path.mkdir(parents=True, exist_ok=True)
-        # Descargar el archivo si no existe
-        if not file_path.exists():
-            response = requests.get(example_info["url"])
-            if response.status_code == 200:
-                with open(file_path, "wb") as f:
-                    f.write(response.content)
-def clone_wav2lip_repo():
-    """
-    Clona el repositorio oficial de Wav2Lip, ocultando el progreso mediante tqdm.
-    """
-    repo_url = "https://github.com/Rudrabha/Wav2Lip"
-    clone_path = "src/Wav2Lip"
-    try:
-        subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("Error al instalar requests.")
-    try:
-        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("Error al instalar tqdm.")
-    from tqdm import tqdm  # Importar tqdm para la barra de progreso
-    # Verifica si el repositorio ya existe para evitar clonarlo nuevamente
-    if os.path.exists(clone_path):
-        print(f"El repositorio '{clone_path}' ya existe.")
-        return
-    # Inicia el proceso de clonación con tqdm para ocultar el progreso
-    print("Clonando el repositorio de Wav2Lip...")
-    with tqdm(total=100, desc="Clonación en progreso", ncols=100, bar_format="{l_bar}{bar}") as pbar:
-        # Ejecuta el comando de clonación
-        exit_code = subprocess.call(["git", "clone", repo_url, clone_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-        if exit_code != 0:
-            raise Exception("Error: La clonación del repositorio ha fallado.")
-        else:
-            pbar.update(100)
-            print("Repositorio clonado exitosamente en 'Wav2Lip'.")
-if __name__ == "__main__":
-    create_project_structure()
-    create_virtual_environment()
-    python_path, pip_path = activate_virtual_environment()
-    download_openvino_utils(pip_path)
-    download_wav2lip_helpers(pip_path)
-    download_example_files()
-    install_requirements(pip_path)
-    clone_wav2lip_repo()

+# 2024/03/11 setup.py
+import os
+import subprocess
+import sys
+import requests
+from pathlib import Path
+# Definición de las carpetas del proyecto
+PROJECT_DIRECTORIES = [
+    "assets",
+    "assets/audio",
+    "assets/video",
+    "checkpoints",
+    "models",
+    "src",
+    "src/utils",
+    "tests",
+    "results"
+]
+# URLs de las utilidades de OpenVINO Notebooks
+OPENVINO_UTILS = {
+    "notebook_utils.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
+    "pip_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py"
+}
+# URLs de los archivos de ayuda de Wav2Lip
+WAV2LIP_HELPERS = {
+    "gradio_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/gradio_helper.py",
+    "ov_inference.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/ov_inference.py",
+    "ov_wav2lip_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/ov_wav2lip_helper.py"
+}
+WAV2LIP_HELPERS_DIR = Path("src")
+OPENVINO_UTILS_DIR = Path("src/utils")
+# URLs de los archivos de ejemplo de entrada
+EXAMPLE_FILES = {
+    "audio_example": {
+        "filename": "data_audio_sun_5s.wav",
+        "url": "https://github.com/sammysun0711/openvino_aigc_samples/blob/main/Wav2Lip/data_audio_sun_5s.wav?raw=true",
+        "folder": "assets/audio"
+    },
+    "video_example": {
+        "filename": "data_video_sun_5s.mp4",
+        "url": "https://github.com/sammysun0711/openvino_aigc_samples/blob/main/Wav2Lip/data_video_sun_5s.mp4?raw=true",
+        "folder": "assets/video"
+    }
+}
+# Función para crear la estructura general del proyecto
+def create_project_structure():
+    """
+    Crea la estructura de las carpetas del proyecto
+    """
+    for directory in PROJECT_DIRECTORIES:
+        path = Path(directory)
+        if not path.exists():
+            path.mkdir(parents=True, exist_ok=True)
+            print(f"Carpeta '{directory}' creada.")
+        else:
+            print(f"Carpeta '{directory}' ya existe.")
+# Función para crear el entorno virtual
+def create_virtual_environment():
+    """
+    Crea el entorno virtual si no existe.
+    """
+    env_path = Path("env")
+    if not env_path.exists():
+        print("Creando el entorno virtual...")
+        subprocess.check_call([sys.executable, "-m", "venv", "env"])
+        print(f"Entorno virtual creado en '{env_path}'.")
+    else:
+        print(f"El entorno virtual '{env_path}' ya existe.")
+# Función que activa y define pip y python
+def activate_virtual_environment():
+    """
+    Activa el entorno virtual y devuelve las rutas de pip y python.
+    """
+    if os.name == 'nt':  # Windows
+        python_path = str(Path("env") / "Scripts" / "python.exe")
+        pip_path = str(Path("env") / "Scripts" / "pip.exe")
+    else:  # Unix/MacOS
+        python_path = str(Path("env") / "bin" / "python")
+        pip_path = str(Path("env") / "bin" / "pip")
+    # Actualizar pip a la última versión en el entorno virtual usando python -m pip
+    try:
+        subprocess.check_call([python_path, "-m", "pip", "install", "--upgrade", "pip"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        print("pip actualizado a la última versión.")
+    except subprocess.CalledProcessError:
+        print("Error al actualizar pip.")
+    try:
+        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        print("Error al instalar tqdm.")
+    return python_path, pip_path
+# Funcion para instalar las dependencias desde requirements.txt con barra de progreso
+def install_requirements(pip_path):
+    """
+    Instala las dependencias de requirements.txt con una barra de progreso.
+    """
+    print("Instalando dependencias...")
+    # Instalar tqdm en el entorno virtual si no está instalado
+    try:
+        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        print("Error al instalar tqdm.")
+    from tqdm import tqdm  # Importar tqdm para la barra de progreso
+    # Leer requirements.txt y mostrar barra de progreso
+    requirements_path = Path("requirements.txt")
+    if not requirements_path.exists():
+        print("Archivo requirements.txt no encontrado.")
+        return
+    with open(requirements_path, "r") as f:
+        dependencies = f.read().splitlines()
+    # Instalar cada dependencia con barra de progreso
+    for dependency in tqdm(dependencies, desc="Instalando dependencias", unit="paquete"):
+        try:
+            subprocess.check_call([pip_path, "install", dependency], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        except subprocess.CalledProcessError:
+            print(f"\nError al instalar {dependency}.")
+    print("Todas las dependencias fueron instaladas correctamente.")
+# Funcion para descargar los archivos de utilidades de OpenVINO Notebooks
+def download_openvino_utils(pip_path):
+    """
+    Descarga los archivos de utilidades de OpenVINO Notebooks en src/utils si no existen.
+    """
+    # Crear la carpeta de utilidades si no existe
+    OPENVINO_UTILS_DIR.mkdir(parents=True, exist_ok=True)
+    # Instalar requests en el entorno virtual si no está instalado
+    try:
+        subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        print("Error al instalar requests.")
+        # Instalar tqdm en el entorno virtual si no está instalado
+    try:
+        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        print("Error al instalar tqdm.")
+    from tqdm import tqdm  # Importar tqdm para la barra de progreso
+    for filename, url in tqdm(OPENVINO_UTILS.items(), desc="Descargando utilidades de OpenVINO", unit="archivo"):
+        file_path = OPENVINO_UTILS_DIR / filename
+        if not file_path.exists():
+            response = requests.get(url)
+            if response.status_code == 200:
+                with open(file_path, "wb") as f:
+                    f.write(response.content)
+            else:
+                print(f"Error al descargar {filename} desde {url}")
+# Función para descargar los archivos de ayuda específicos de Wav2Lip
+def download_wav2lip_helpers(pip_path):
+    """
+    Descarga los archivos de ayuda específicos de Wav2Lip si no existen.
+    """
+    WAV2LIP_HELPERS_DIR.mkdir(parents=True, exist_ok=True)  # Crea `src` si no existe
+    # Instalar requests en el entorno virtual si no está instalado
+    try:
+        subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        print("Error al instalar requests.")
+    try:
+        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        print("Error al instalar tqdm.")
+    from tqdm import tqdm  # Importar tqdm para la barra de progreso
+    for filename, url in tqdm(WAV2LIP_HELPERS.items(), desc="Descargando ayudas de Wav2Lip", unit="archivo"):
+        file_path = WAV2LIP_HELPERS_DIR / filename
+        if not file_path.exists():
+            response = requests.get(url)
+            if response.status_code == 200:
+                with open(file_path, "wb") as f:
+                    f.write(response.content)
+# Función para descargar los archivos de ejemplo de entrada (audio y video)
+def download_example_files():
+    """
+    Descarga los archivos de ejemplo de entrada (audio y video) en sus carpetas correspondientes.
+    """
+    # Instalar requests en el entorno virtual si no está instalado
+    try:
+        subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        print("Error al instalar requests.")
+    try:
+        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        print("Error al instalar tqdm.")
+    from tqdm import tqdm  # Importar tqdm para la barra de progreso
+    for example_name, example_info in tqdm(EXAMPLE_FILES.items(), desc="Descargando archivos de ejemplo", unit="archivo"):
+        folder_path = Path(example_info["folder"])
+        file_path = folder_path / example_info["filename"]
+        # Crear la carpeta si no existe
+        folder_path.mkdir(parents=True, exist_ok=True)
+        # Descargar el archivo si no existe
+        if not file_path.exists():
+            response = requests.get(example_info["url"])
+            if response.status_code == 200:
+                with open(file_path, "wb") as f:
+                    f.write(response.content)
+def clone_wav2lip_repo():
+    """
+    Clona el repositorio oficial de Wav2Lip, ocultando el progreso mediante tqdm.
+    """
+    repo_url = "https://github.com/Rudrabha/Wav2Lip"
+    clone_path = "src/Wav2Lip"
+    try:
+        subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        print("Error al instalar requests.")
+    try:
+        subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        print("Error al instalar tqdm.")
+    from tqdm import tqdm  # Importar tqdm para la barra de progreso
+    # Verifica si el repositorio ya existe para evitar clonarlo nuevamente
+    if os.path.exists(clone_path):
+        print(f"El repositorio '{clone_path}' ya existe.")
+        return
+    # Inicia el proceso de clonación con tqdm para ocultar el progreso
+    print("Clonando el repositorio de Wav2Lip...")
+    with tqdm(total=100, desc="Clonación en progreso", ncols=100, bar_format="{l_bar}{bar}") as pbar:
+        # Ejecuta el comando de clonación
+        exit_code = subprocess.call(["git", "clone", repo_url, clone_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        if exit_code != 0:
+            raise Exception("Error: La clonación del repositorio ha fallado.")
+        else:
+            pbar.update(100)
+            print("Repositorio clonado exitosamente en 'Wav2Lip'.")
+if __name__ == "__main__":
+    create_project_structure()
+    create_virtual_environment()
+    python_path, pip_path = activate_virtual_environment()
+    download_openvino_utils(pip_path)
+    download_wav2lip_helpers(pip_path)
+    download_example_files()
+    install_requirements(pip_path)
+    clone_wav2lip_repo()

src/.gradio/certificate.pem DELETED Viewed

@@ -1,31 +0,0 @@
------BEGIN CERTIFICATE-----
-MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
-TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
-cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
-WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
-ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
-MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
-h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
-0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
-A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
-T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
-B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
-B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
-KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
-OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
-jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
-qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
-rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
-HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
-hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
-ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
-3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
-NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
-ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
-TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
-jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
-oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
-4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
-mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
-emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
------END CERTIFICATE-----

src/audio_recorder.py DELETED Viewed

@@ -1,48 +0,0 @@
-# audio_recorder.py
-import sounddevice as sd
-from scipy.io.wavfile import write
-import os
-# Ruta para guardar el archivo de audio en el directorio `assets/audio/`
-AUDIO_PATH = os.path.join("..", "assets", "audio", "grabacion_8s.wav")
-def listar_dispositivos():
-    """
-    Lista todos los dispositivos de audio disponibles en el sistema.
-    """
-    print("Dispositivos de audio disponibles:")
-    dispositivos = sd.query_devices()
-    for idx, dispositivo in enumerate(dispositivos):
-        print(f"{idx}: {dispositivo['name']} - {'Entrada' if dispositivo['max_input_channels'] > 0 else 'Salida'}")
-    print("\nSelecciona el índice del dispositivo de entrada que prefieras para grabar audio.")
-def record_audio(duration=8, sample_rate=44100, device_index=None):
-    """
-    Graba el audio desde el micrófono durante un tiempo específico y lo guarda como archivo WAV.
-    Args:
-        duration (int): Duración de la grabación en segundos.
-        sample_rate (int): Frecuencia de muestreo del audio.
-        device_index (int): Índice del dispositivo de audio a utilizar.
-    """
-    print("Grabando...")
-    # Iniciar la grabación con un canal
-    audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, device=device_index)
-    sd.wait()  # Espera a que la grabación termine
-    # Guardar el archivo de audio
-    write(AUDIO_PATH, sample_rate, audio_data)
-    print(f"Grabación completada. Archivo guardado en: {AUDIO_PATH}")
-if __name__ == "__main__":
-    # Paso 1: Listar dispositivos de audio
-    listar_dispositivos()
-    # Aquí esperaremos tu selección del índice del dispositivo
-    device_index = int(input("Introduce el índice del dispositivo de entrada que deseas utilizar: "))
-    # Paso 2: Grabar audio con el dispositivo seleccionado
-    record_audio(device_index=device_index)

src/call_openai_api.py DELETED Viewed

@@ -1,80 +0,0 @@
-import os
-from dotenv import load_dotenv
-from langchain.chat_models import ChatOpenAI
-from langchain.prompts import PromptTemplate
-from langchain.chains import LLMChain
-from pathlib import Path
-#Cargar variables de entorno desde el archivo .env
-# Ruta relativa al archivo .env en models/
-project_root = Path(__file__).resolve().parent.parent  # Sube al nivel raíz del proyecto
-env_path = project_root / "models" / ".env"           # Ruta completa al archivo .env
-load_dotenv(dotenv_path=env_path)
-#Configuracion de la clave de la api
-api_key = os.getenv("OPENAI_API_KEY")
-if not api_key:
-    raise ValueError("No se encontro la clave de API")
-OPENAI_KEY_VAL = api_key
-llm = ChatOpenAI(
-    openai_api_key = OPENAI_KEY_VAL,
-    temperature = 0.7,
-    model = "gpt-4"
-)
-#plantilla del prompt con el texto leido del archivo
-template ="""
-Eres un asistente de IA que orienta a los alumnos a ser mejores personas. Haz una haiku de 5 lineas sobre lo que te estan comentando. Da siempre la respuesta en Español
-Texto:{texto}
-Respuesta:
-"""
-prompt = PromptTemplate(
-    input_variables = ["texto"],
-    template = template
-)
-chain = LLMChain(
-    llm = llm,
-    prompt = prompt
-)
-#def save_summary_to_file(summary_text, filename = 'response.txt'):
-def save_summary_to_file(summary_text, filename = 'C:/programacionEjercicios/miwav2lipv6/results/OpenAI_response.txt'):
-    try:
-        with open(filename,'w', encoding='utf-8') as file:
-            file.write(summary_text)
-        print(f"El resumen se ha guardado exitosamente en {filename}")
-    except Exception as e:
-        print(f"Ocurrio un error al guardar el resumen {e}")
-def read_text_from_file(filename):
-    try:
-        with open(filename, 'r') as file:
-            return file.read()
-    except Exception as e:
-        print(f"Error al leer el archivo {filename}: {e}")
-        return ""
-#def main():
-def moni(archivo):
-    #texto_usuario = input("Ingresa un texto para resumir:")
-    #texto_usuario = read_text_from_file("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
-    texto_usuario = read_text_from_file(archivo)
-    resultado = chain.run(texto = texto_usuario)
-    #Mostrar el resumen generado
-    print("\nResumen generado:")
-    print(resultado)
-    save_summary_to_file(resultado)
-    return resultado
-#
-if __name__ == "__main__":
-    moni()

src/convert_models.py DELETED Viewed

@@ -1,16 +0,0 @@
-import sys
-from pathlib import Path
-# Añade `src` a `sys.path` para que Python encuentre el módulo `utils`
-sys.path.append(str(Path(__file__).resolve().parent))
-# Importa la función desde utils/notebook_utils.py
-from utils.notebook_utils import download_file
-from ov_wav2lip_helper import download_and_convert_models
-OV_FACE_DETECTION_MODEL_PATH = Path("../miwav2lipv6/models/face_detection.xml")
-OV_WAV2LIP_MODEL_PATH = Path("../miwav2lipv6/models/wav2lip.xml")
-download_and_convert_models(OV_FACE_DETECTION_MODEL_PATH, OV_WAV2LIP_MODEL_PATH)

src/gradio_helper.py DELETED Viewed

@@ -1,26 +0,0 @@
-from typing import Callable
-import gradio as gr
-import numpy as np
-examples = [
-    [
-        #"data_video_sun_5s.mp4",
-        "data_video_sun.mp4",
-        "data_audio_sun_5s.wav",
-    ],
-]
-def make_demo(fn: Callable):
-    demo = gr.Interface(
-        fn=fn,
-        inputs=[
-            gr.Video(label="Face video"),
-            gr.Audio(label="Audio", type="filepath"),
-        ],
-        outputs="video",
-        examples=examples,
-        allow_flagging="never",
-    )
-    return demo

src/interface.py DELETED Viewed

@@ -1,60 +0,0 @@
-# interface.py
-import gradio as gr
-import sounddevice as sd
-from scipy.io.wavfile import write
-import tempfile
-import shutil
-import os
-# Rutas de video y audio con absolutas para evitar errores de acceso
-AUDIO_COPY_PATH = os.path.abspath(os.path.join("..", "miwav2lipv6","assets", "audio", "grabacion_gradio.wav"))
-#VIDEO_PATH = os.path.abspath("../miwav2lipv6/assets/video/data_video_sun_5s.mp4")
-VIDEO_PATH = os.path.abspath("../miwav2lipv6/assets/video/data_video_sun.mp4")
-# Verificar la existencia del video
-if not os.path.exists(VIDEO_PATH):
-    print(f"Advertencia: El archivo de video no se encontró en la ruta {VIDEO_PATH}")
-# Función para grabar audio
-def grabar_audio(duration=8, sample_rate=44100):
-    print("Grabando...")
-    audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
-    sd.wait()  # Espera a que la grabación termine
-    # Guardar archivo temporal de audio
-    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-    write(temp_audio.name, sample_rate, audio_data)
-    print("Grabación completada. Archivo temporal guardado en:", temp_audio.name)
-    # Verificar y crear `assets/audio` si no existe
-    os.makedirs(os.path.dirname(AUDIO_COPY_PATH), exist_ok=True)
-    # Copiar a `assets/audio`
-    shutil.copy(temp_audio.name, AUDIO_COPY_PATH)
-    print(f"Copia de la grabación guardada en: {AUDIO_COPY_PATH}")
-    return AUDIO_COPY_PATH
-# Función principal para la interfaz de Gradio
-def interfaz():
-    with gr.Blocks() as demo:
-        gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
-        # Crear un botón de grabación
-        with gr.Row():
-            grabar_button = gr.Button("Iniciar Grabación")
-        # Mostrar el audio grabado a la derecha
-        output_audio = gr.Audio(label="Grabación de Audio", type="filepath")
-        # Asignar la función al botón
-        grabar_button.click(grabar_audio, outputs=output_audio)
-    return demo
-# Ejecuta la interfaz con la ruta absoluta en allowed_paths
-if __name__ == "__main__":
-    demo = interfaz()
-    demo.launch(allowed_paths=[os.path.dirname(AUDIO_COPY_PATH)])

src/interfaceV2.py DELETED Viewed

@@ -1,183 +0,0 @@
-# interfaceV2.py
-import gradio as gr
-import sounddevice as sd
-from scipy.io.wavfile import write
-import tempfile
-import shutil
-import os
-import subprocess
-import sys
-from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
-from call_openai_api import  moni as rtff   # Asegúrate de que el archivo call_open_api.py esté en el mismo directorio
-# Paths to files (adjusted as per your specified structure)
-AUDIO_RECORD_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/grabacion_gradio.wav")
-#VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun_5s.mp4")
-VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun.mp4")
-#TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
-TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
-RESULT_AUDIO_TEMP_PATH = os.path.abspath( "C:/programacionEjercicios/miwav2lipv6/results/audiov2.wav")
-RESULT_AUDIO_FINAL_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/audio.wav")
-RESULT_VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/result_voice.mp4")
-TEXT_TO_SPEECH_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/text_to_speech.py")
-# Function to record 8-second audio
-def grabar_audio(duration=8, sample_rate=44100):
-    print("Starting recording...")
-    audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
-    print(f"Recording in progress for {duration} seconds...")
-    sd.wait()
-    print("Recording completed.")
-    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
-    write(temp_audio.name, sample_rate, audio_data)
-    print("Audio temporarily saved at:", temp_audio.name)
-    temp_audio.close()  # Asegúrate de cerrarlo antes de usarlo
-    os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
-    shutil.copy(temp_audio.name, AUDIO_RECORD_PATH)
-    print(f"Recording copied to: {AUDIO_RECORD_PATH}")
-    return AUDIO_RECORD_PATH, "Recording completed."
-# Function to transcribe audio with Whisper
-def transcribir_con_progreso(audio_path):
-    progreso = gr.Progress()
-    progreso(0, "Starting transcription...")
-    model_name = "openai/whisper-large"
-    progreso(25, "Loading Whisper model...")
-    transcripcion = transcribe_audio(audio_path, model_name)
-    progreso(75, "Saving transcription...")
-    guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
-    progreso(100, "Transcription completed.")
-    if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
-        raise FileNotFoundError(f"El archivo {TRANSCRIPTION_TEXT_PATH} no se generó.")
-    return transcripcion
-# Function to convert text to audio using text_to_speech.py
-def generar_audio_desde_texto():
-    print("Generating audio from text...")
-    result = subprocess.run(
-        [sys.executable, TEXT_TO_SPEECH_PATH],
-        capture_output=True,
-        text=True
-    )
-    if result.returncode != 0:
-        raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
-    if result.stdout:
-        print("Output:", result.stdout)
-    if result.stderr:
-        print("Errors:", result.stderr)
-    if os.path.exists(RESULT_AUDIO_TEMP_PATH):
-        print(f"Temporary audio generated at: {RESULT_AUDIO_TEMP_PATH}")
-        os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
-        shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
-        print(f"Final audio copied to: {RESULT_AUDIO_FINAL_PATH}")
-        return RESULT_AUDIO_FINAL_PATH
-    else:
-        print(f"Error: Audio file was not generated in {RESULT_AUDIO_FINAL_PATH} ")
-        return None
-# Function to process video and audio using run_inference.py with the generated audio file
-def procesar_video_audio():
-    print("Starting video and audio processing...")
-    run_inference_path = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/run_inference.py")
-    result = subprocess.run(
-        [sys.executable, run_inference_path, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
-        capture_output=True,
-        text=True
-    )
-    if result.stdout:
-        print("Output:", result.stdout)
-    if result.stderr:
-        print("Errors:", result.stderr)
-    if os.path.exists(RESULT_VIDEO_PATH):
-        print(f"Processed video saved at: {RESULT_VIDEO_PATH}")
-        return RESULT_VIDEO_PATH
-    else:
-        print("Error: Video file was not generated at 'results/result_voice.mp4'")
-        return None
-# Gradio Interface Configuration
-def interfaz():
-    with gr.Blocks() as demo:
-        with gr.Row():
-            with gr.Column():
-                gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
-                grabar_button = gr.Button("Comenzando la grabacion de audio")
-                estado_grabacion = gr.Textbox(label="Recording Status", interactive=False)
-            with gr.Column():
-                output_audio = gr.Audio(AUDIO_RECORD_PATH, label="Audio Grabado", interactive=False)
-                output_audio_speech = gr.Audio(RESULT_AUDIO_FINAL_PATH, label="Audio TTS", interactive=False)
-                video_resultado = gr.Video(RESULT_VIDEO_PATH,label="Video procesado", interactive=False)
-                texto_transcripcion = gr.Textbox(label="Texto transcrito")
-                progreso_transcripcion = gr.Textbox(label="Transcription Status", interactive=False)
-            # Full flow: recording, transcription, text-to-speech, and video processing
-            """
-            def flujo_completo():
-                _, mensaje_grabacion = grabar_audio()
-                transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
-                audio_generado = generar_audio_desde_texto()
-                video_path = procesar_video_audio()
-                # Ensure function always returns 5 outputs for Gradio, even in error cases
-                if video_path and audio_generado:
-                    return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
-                else:
-                    return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado or "Audio generation failed", video_path or "Video generation failed"
-            """
-            def flujo_completo():
-                try:
-                    print("Inicio del flujo completo...")
-                    # Grabar audio
-                    audio_path, mensaje_grabacion = grabar_audio()
-                    print("Audio grabado en:", audio_path)
-                    # Transcribir audio
-                    transcripcion = transcribir_con_progreso(audio_path)
-                    print("Transcripción completada:", transcripcion)
-                    #respuesta_openai = rtff(transcripcion)
-                    respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
-                    print("Respuesta generada  por OpenAI")
-                    # Generar audio desde texto
-                    audio_generado = generar_audio_desde_texto()
-                    print("Audio generado:", audio_generado)
-                    # Procesar video y audio
-                    video_path = procesar_video_audio()
-                    print("Video procesado en:", video_path)
-                    # Devolver resultados si todo fue exitoso
-                    return mensaje_grabacion, audio_path, transcripcion, audio_generado, video_path
-                except Exception as e:
-                    # Imprime el error en la terminal y regresa mensajes de error a la interfaz
-                    print("Error detectado en flujo completo:", str(e))
-                    return (
-                        "Error durante el flujo completo",
-                        None,  # Audio grabado
-                        f"Error: {str(e)}",  # Transcripción
-                        None,  # Audio generado
-                        None   # Video procesado
-                            )
-            grabar_button.click(
-                flujo_completo,
-                outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
-            )
-    return demo
-if __name__ == "__main__":
-    demo = interfaz()
-    demo.launch(allowed_paths=["C:/programacionEjercicios/miwav2lipv6/assets", "C:/programacionEjercicios/miwav2lipv6/results"])

src/ov_inference.py DELETED Viewed

@@ -1,637 +0,0 @@
-from glob import glob
-from enum import Enum
-import math
-import subprocess
-import cv2
-import numpy as np
-from tqdm import tqdm
-import torch
-import torch.nn.functional as F
-from Wav2Lip import audio
-import openvino as ov
-device = "cpu"
-def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh):
-    xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1
-    dx, dy = (xc - axc) / aww, (yc - ayc) / ahh
-    dw, dh = math.log(ww / aww), math.log(hh / ahh)
-    return dx, dy, dw, dh
-def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh):
-    xc, yc = dx * aww + axc, dy * ahh + ayc
-    ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh
-    x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2
-    return x1, y1, x2, y2
-def nms(dets, thresh):
-    if 0 == len(dets):
-        return []
-    x1, y1, x2, y2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4]
-    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
-    order = scores.argsort()[::-1]
-    keep = []
-    while order.size > 0:
-        i = order[0]
-        keep.append(i)
-        xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]])
-        xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]])
-        w, h = np.maximum(0.0, xx2 - xx1 + 1), np.maximum(0.0, yy2 - yy1 + 1)
-        ovr = w * h / (areas[i] + areas[order[1:]] - w * h)
-        inds = np.where(ovr <= thresh)[0]
-        order = order[inds + 1]
-    return keep
-def encode(matched, priors, variances):
-    """Encode the variances from the priorbox layers into the ground truth boxes
-    we have matched (based on jaccard overlap) with the prior boxes.
-    Args:
-        matched: (tensor) Coords of ground truth for each prior in point-form
-            Shape: [num_priors, 4].
-        priors: (tensor) Prior boxes in center-offset form
-            Shape: [num_priors,4].
-        variances: (list[float]) Variances of priorboxes
-    Return:
-        encoded boxes (tensor), Shape: [num_priors, 4]
-    """
-    # dist b/t match center and prior's center
-    g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
-    # encode variance
-    g_cxcy /= variances[0] * priors[:, 2:]
-    # match wh / prior wh
-    g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
-    g_wh = torch.log(g_wh) / variances[1]
-    # return target for smooth_l1_loss
-    return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
-def decode(loc, priors, variances):
-    """Decode locations from predictions using priors to undo
-    the encoding we did for offset regression at train time.
-    Args:
-        loc (tensor): location predictions for loc layers,
-            Shape: [num_priors,4]
-        priors (tensor): Prior boxes in center-offset form.
-            Shape: [num_priors,4].
-        variances: (list[float]) Variances of priorboxes
-    Return:
-        decoded bounding box predictions
-    """
-    boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
-    boxes[:, :2] -= boxes[:, 2:] / 2
-    boxes[:, 2:] += boxes[:, :2]
-    return boxes
-def batch_decode(loc, priors, variances):
-    """Decode locations from predictions using priors to undo
-    the encoding we did for offset regression at train time.
-    Args:
-        loc (tensor): location predictions for loc layers,
-            Shape: [num_priors,4]
-        priors (tensor): Prior boxes in center-offset form.
-            Shape: [num_priors,4].
-        variances: (list[float]) Variances of priorboxes
-    Return:
-        decoded bounding box predictions
-    """
-    boxes = torch.cat((priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:], priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
-    boxes[:, :, :2] -= boxes[:, :, 2:] / 2
-    boxes[:, :, 2:] += boxes[:, :, :2]
-    return boxes
-def get_smoothened_boxes(boxes, T):
-    for i in range(len(boxes)):
-        if i + T > len(boxes):
-            window = boxes[len(boxes) - T :]
-        else:
-            window = boxes[i : i + T]
-        boxes[i] = np.mean(window, axis=0)
-    return boxes
-def detect(net, img, device):
-    img = img - np.array([104, 117, 123])
-    img = img.transpose(2, 0, 1)
-    img = img.reshape((1,) + img.shape)
-    img = torch.from_numpy(img).float().to(device)
-    BB, CC, HH, WW = img.size()
-    results = net({"x": img})
-    olist = [torch.Tensor(results[i]) for i in range(12)]
-    bboxlist = []
-    for i in range(len(olist) // 2):
-        olist[i * 2] = F.softmax(olist[i * 2], dim=1)
-    olist = [oelem.data.cpu() for oelem in olist]
-    for i in range(len(olist) // 2):
-        ocls, oreg = olist[i * 2], olist[i * 2 + 1]
-        FB, FC, FH, FW = ocls.size()  # feature map size
-        stride = 2 ** (i + 2)  # 4,8,16,32,64,128
-        anchor = stride * 4
-        poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
-        for Iindex, hindex, windex in poss:
-            axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
-            score = ocls[0, 1, hindex, windex]
-            loc = oreg[0, :, hindex, windex].contiguous().view(1, 4)
-            priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]])
-            variances = [0.1, 0.2]
-            box = decode(loc, priors, variances)
-            x1, y1, x2, y2 = box[0] * 1.0
-            # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
-            bboxlist.append([x1, y1, x2, y2, score])
-    bboxlist = np.array(bboxlist)
-    if 0 == len(bboxlist):
-        bboxlist = np.zeros((1, 5))
-    return bboxlist
-def batch_detect(net, imgs, device):
-    imgs = imgs - np.array([104, 117, 123])
-    imgs = imgs.transpose(0, 3, 1, 2)
-    imgs = torch.from_numpy(imgs).float().to(device)
-    BB, CC, HH, WW = imgs.size()
-    results = net({"x": imgs.numpy()})
-    olist = [torch.Tensor(results[i]) for i in range(12)]
-    bboxlist = []
-    for i in range(len(olist) // 2):
-        olist[i * 2] = F.softmax(olist[i * 2], dim=1)
-        # olist[i * 2] = (olist[i * 2], dim=1)
-    olist = [oelem.data.cpu() for oelem in olist]
-    for i in range(len(olist) // 2):
-        ocls, oreg = olist[i * 2], olist[i * 2 + 1]
-        FB, FC, FH, FW = ocls.size()  # feature map size
-        stride = 2 ** (i + 2)  # 4,8,16,32,64,128
-        anchor = stride * 4
-        poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
-        for Iindex, hindex, windex in poss:
-            axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
-            score = ocls[:, 1, hindex, windex]
-            loc = oreg[:, :, hindex, windex].contiguous().view(BB, 1, 4)
-            priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]).view(1, 1, 4)
-            variances = [0.1, 0.2]
-            box = batch_decode(loc, priors, variances)
-            box = box[:, 0] * 1.0
-            # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
-            bboxlist.append(torch.cat([box, score.unsqueeze(1)], 1).cpu().numpy())
-    bboxlist = np.array(bboxlist)
-    if 0 == len(bboxlist):
-        bboxlist = np.zeros((1, BB, 5))
-    return bboxlist
-def flip_detect(net, img, device):
-    img = cv2.flip(img, 1)
-    b = detect(net, img, device)
-    bboxlist = np.zeros(b.shape)
-    bboxlist[:, 0] = img.shape[1] - b[:, 2]
-    bboxlist[:, 1] = b[:, 1]
-    bboxlist[:, 2] = img.shape[1] - b[:, 0]
-    bboxlist[:, 3] = b[:, 3]
-    bboxlist[:, 4] = b[:, 4]
-    return bboxlist
-def pts_to_bb(pts):
-    min_x, min_y = np.min(pts, axis=0)
-    max_x, max_y = np.max(pts, axis=0)
-    return np.array([min_x, min_y, max_x, max_y])
-class OVFaceDetector(object):
-    """An abstract class representing a face detector.
-    Any other face detection implementation must subclass it. All subclasses
-    must implement ``detect_from_image``, that return a list of detected
-    bounding boxes. Optionally, for speed considerations detect from path is
-    recommended.
-    """
-    def __init__(self, device, verbose):
-        self.device = device
-        self.verbose = verbose
-    def detect_from_image(self, tensor_or_path):
-        """Detects faces in a given image.
-        This function detects the faces present in a provided BGR(usually)
-        image. The input can be either the image itself or the path to it.
-        Arguments:
-            tensor_or_path {numpy.ndarray, torch.tensor or string} -- the path
-            to an image or the image itself.
-        Example::
-            >>> path_to_image = 'data/image_01.jpg'
-            ...   detected_faces = detect_from_image(path_to_image)
-            [A list of bounding boxes (x1, y1, x2, y2)]
-            >>> image = cv2.imread(path_to_image)
-            ...   detected_faces = detect_from_image(image)
-            [A list of bounding boxes (x1, y1, x2, y2)]
-        """
-        raise NotImplementedError
-    def detect_from_directory(self, path, extensions=[".jpg", ".png"], recursive=False, show_progress_bar=True):
-        """Detects faces from all the images present in a given directory.
-        Arguments:
-            path {string} -- a string containing a path that points to the folder containing the images
-        Keyword Arguments:
-            extensions {list} -- list of string containing the extensions to be
-            consider in the following format: ``.extension_name`` (default:
-            {['.jpg', '.png']}) recursive {bool} -- option wherever to scan the
-            folder recursively (default: {False}) show_progress_bar {bool} --
-            display a progressbar (default: {True})
-        Example:
-        >>> directory = 'data'
-        ...   detected_faces = detect_from_directory(directory)
-        {A dictionary of [lists containing bounding boxes(x1, y1, x2, y2)]}
-        """
-        if self.verbose:
-            logger = logging.getLogger(__name__)
-        if len(extensions) == 0:
-            if self.verbose:
-                logger.error("Expected at list one extension, but none was received.")
-            raise ValueError
-        if self.verbose:
-            logger.info("Constructing the list of images.")
-        additional_pattern = "/**/*" if recursive else "/*"
-        files = []
-        for extension in extensions:
-            files.extend(glob.glob(path + additional_pattern + extension, recursive=recursive))
-        if self.verbose:
-            logger.info("Finished searching for images. %s images found", len(files))
-            logger.info("Preparing to run the detection.")
-        predictions = {}
-        for image_path in tqdm(files, disable=not show_progress_bar):
-            if self.verbose:
-                logger.info("Running the face detector on image: %s", image_path)
-            predictions[image_path] = self.detect_from_image(image_path)
-        if self.verbose:
-            logger.info("The detector was successfully run on all %s images", len(files))
-        return predictions
-    @property
-    def reference_scale(self):
-        raise NotImplementedError
-    @property
-    def reference_x_shift(self):
-        raise NotImplementedError
-    @property
-    def reference_y_shift(self):
-        raise NotImplementedError
-    @staticmethod
-    def tensor_or_path_to_ndarray(tensor_or_path, rgb=True):
-        """Convert path (represented as a string) or torch.tensor to a numpy.ndarray
-        Arguments:
-            tensor_or_path {numpy.ndarray, torch.tensor or string} -- path to the image, or the image itself
-        """
-        if isinstance(tensor_or_path, str):
-            return cv2.imread(tensor_or_path) if not rgb else cv2.imread(tensor_or_path)[..., ::-1]
-        elif torch.is_tensor(tensor_or_path):
-            # Call cpu in case its coming from cuda
-            return tensor_or_path.cpu().numpy()[..., ::-1].copy() if not rgb else tensor_or_path.cpu().numpy()
-        elif isinstance(tensor_or_path, np.ndarray):
-            return tensor_or_path[..., ::-1].copy() if not rgb else tensor_or_path
-        else:
-            raise TypeError
-class OVSFDDetector(OVFaceDetector):
-    def __init__(self, device, path_to_detector="models/face_detection.xml", verbose=False):
-        super(OVSFDDetector, self).__init__(device, verbose)
-        core = ov.Core()
-        self.face_detector = core.compile_model(path_to_detector, self.device)
-    def detect_from_image(self, tensor_or_path):
-        image = self.tensor_or_path_to_ndarray(tensor_or_path)
-        bboxlist = detect(self.face_detector, image, device="cpu")
-        keep = nms(bboxlist, 0.3)
-        bboxlist = bboxlist[keep, :]
-        bboxlist = [x for x in bboxlist if x[-1] > 0.5]
-        return bboxlist
-    def detect_from_batch(self, images):
-        bboxlists = batch_detect(self.face_detector, images, device="cpu")
-        keeps = [nms(bboxlists[:, i, :], 0.3) for i in range(bboxlists.shape[1])]
-        bboxlists = [bboxlists[keep, i, :] for i, keep in enumerate(keeps)]
-        bboxlists = [[x for x in bboxlist if x[-1] > 0.5] for bboxlist in bboxlists]
-        return bboxlists
-    @property
-    def reference_scale(self):
-        return 195
-    @property
-    def reference_x_shift(self):
-        return 0
-    @property
-    def reference_y_shift(self):
-        return 0
-class LandmarksType(Enum):
-    """Enum class defining the type of landmarks to detect.
-    ``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face
-    ``_2halfD`` - this points represent the projection of the 3D points into 3D
-    ``_3D`` - detect the points ``(x,y,z)``` in a 3D space
-    """
-    _2D = 1
-    _2halfD = 2
-    _3D = 3
-class NetworkSize(Enum):
-    # TINY = 1
-    # SMALL = 2
-    # MEDIUM = 3
-    LARGE = 4
-    def __new__(cls, value):
-        member = object.__new__(cls)
-        member._value_ = value
-        return member
-    def __int__(self):
-        return self.value
-class OVFaceAlignment:
-    def __init__(
-        self, landmarks_type, network_size=NetworkSize.LARGE, device="CPU", flip_input=False, verbose=False, path_to_detector="models/face_detection.xml"
-    ):
-        self.device = device
-        self.flip_input = flip_input
-        self.landmarks_type = landmarks_type
-        self.verbose = verbose
-        network_size = int(network_size)
-        self.face_detector = OVSFDDetector(device=device, path_to_detector=path_to_detector, verbose=verbose)
-    def get_detections_for_batch(self, images):
-        images = images[..., ::-1]
-        detected_faces = self.face_detector.detect_from_batch(images.copy())
-        results = []
-        for i, d in enumerate(detected_faces):
-            if len(d) == 0:
-                results.append(None)
-                continue
-            d = d[0]
-            d = np.clip(d, 0, None)
-            x1, y1, x2, y2 = map(int, d[:-1])
-            results.append((x1, y1, x2, y2))
-        return results
-def face_detect_ov(images, device, face_det_batch_size, pads, nosmooth, path_to_detector):
-    detector = OVFaceAlignment(LandmarksType._2D, flip_input=False, device=device, path_to_detector=path_to_detector)
-    batch_size = face_det_batch_size
-    print("face_detect_ov images[0].shape: ", images[0].shape)
-    while 1:
-        predictions = []
-        try:
-            for i in tqdm(range(0, len(images), batch_size)):
-                predictions.extend(detector.get_detections_for_batch(np.array(images[i : i + batch_size])))
-        except RuntimeError:
-            if batch_size == 1:
-                raise RuntimeError("Image too big to run face detection on GPU. Please use the --resize_factor argument")
-            batch_size //= 2
-            print("Recovering from OOM error; New batch size: {}".format(batch_size))
-            continue
-        break
-    results = []
-    pady1, pady2, padx1, padx2 = pads
-    for rect, image in zip(predictions, images):
-        if rect is None:
-            # check this frame where the face was not detected.
-            cv2.imwrite("temp/faulty_frame.jpg", image)
-            raise ValueError("Face not detected! Ensure the video contains a face in all the frames.")
-        y1 = max(0, rect[1] - pady1)
-        y2 = min(image.shape[0], rect[3] + pady2)
-        x1 = max(0, rect[0] - padx1)
-        x2 = min(image.shape[1], rect[2] + padx2)
-        results.append([x1, y1, x2, y2])
-    boxes = np.array(results)
-    if not nosmooth:
-        boxes = get_smoothened_boxes(boxes, T=5)
-    results = [[image[y1:y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)]
-    del detector
-    return results
-def datagen(frames, mels, box, static, face_det_batch_size, pads, nosmooth, img_size, wav2lip_batch_size, path_to_detector):
-    img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
-    if box[0] == -1:
-        if not static:
-            # BGR2RGB for CNN face detection
-            face_det_results = face_detect_ov(frames, "CPU", face_det_batch_size, pads, nosmooth, path_to_detector)
-        else:
-            face_det_results = face_detect_ov([frames[0]], "CPU", face_det_batch_size, pads, nosmooth, path_to_detector)
-    else:
-        print("Using the specified bounding box instead of face detection...")
-        y1, y2, x1, x2 = box
-        face_det_results = [[f[y1:y2, x1:x2], (y1, y2, x1, x2)] for f in frames]
-    for i, m in enumerate(mels):
-        idx = 0 if static else i % len(frames)
-        frame_to_save = frames[idx].copy()
-        face, coords = face_det_results[idx].copy()
-        face = cv2.resize(face, (img_size, img_size))
-        img_batch.append(face)
-        mel_batch.append(m)
-        frame_batch.append(frame_to_save)
-        coords_batch.append(coords)
-        if len(img_batch) >= wav2lip_batch_size:
-            img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
-            img_masked = img_batch.copy()
-            img_masked[:, img_size // 2 :] = 0
-            img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.0
-            mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
-            yield img_batch, mel_batch, frame_batch, coords_batch
-            img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
-    if len(img_batch) > 0:
-        img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
-        img_masked = img_batch.copy()
-        img_masked[:, img_size // 2 :] = 0
-        img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.0
-        mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
-        yield img_batch, mel_batch, frame_batch, coords_batch
-def ov_inference(
-    face_path,
-    audio_path,
-    face_detection_path="models/face_detection.xml",
-    wav2lip_path="models/wav2lip.xml",
-    inference_device="CPU",
-    wav2lip_batch_size=128,
-    outfile="results/result_voice.mp4",
-    resize_factor=1,
-    rotate=False,
-    crop=[0, -1, 0, -1],
-    mel_step_size=16,
-    box=[-1, -1, -1, -1],
-    static=False,
-    img_size=96,
-    face_det_batch_size=16,
-    pads=[0, 10, 0, 0],
-    nosmooth=False,
-):
-    print("Reading video frames...")
-    video_stream = cv2.VideoCapture(face_path)
-    fps = video_stream.get(cv2.CAP_PROP_FPS)
-    full_frames = []
-    while 1:
-        still_reading, frame = video_stream.read()
-        if not still_reading:
-            video_stream.release()
-            break
-        if resize_factor > 1:
-            frame = cv2.resize(frame, (frame.shape[1] // resize_factor, frame.shape[0] // resize_factor))
-        if rotate:
-            frame = cv2.rotate(frame, cv2.cv2.ROTATE_90_CLOCKWISE)
-        y1, y2, x1, x2 = crop
-        if x2 == -1:
-            x2 = frame.shape[1]
-        if y2 == -1:
-            y2 = frame.shape[0]
-        frame = frame[y1:y2, x1:x2]
-        full_frames.append(frame)
-    print("Number of frames available for inference: " + str(len(full_frames)))
-    core = ov.Core()
-    if not audio_path.endswith(".wav"):
-        print("Extracting raw audio...")
-        command = "ffmpeg -y -i {} -strict -2 {}".format(audio_path, "temp/temp.wav")
-        subprocess.call(command, shell=True)
-        audio_path = "temp/temp.wav"
-    wav = audio.load_wav(audio_path, 16000)
-    mel = audio.melspectrogram(wav)
-    print(mel.shape)
-    if np.isnan(mel.reshape(-1)).sum() > 0:
-        raise ValueError("Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again")
-    mel_chunks = []
-    mel_idx_multiplier = 80.0 / fps
-    i = 0
-    while 1:
-        start_idx = int(i * mel_idx_multiplier)
-        if start_idx + mel_step_size > len(mel[0]):
-            mel_chunks.append(mel[:, len(mel[0]) - mel_step_size :])
-            break
-        mel_chunks.append(mel[:, start_idx : start_idx + mel_step_size])
-        i += 1
-    print("Length of mel chunks: {}".format(len(mel_chunks)))
-    full_frames = full_frames[: len(mel_chunks)]
-    batch_size = wav2lip_batch_size
-    gen = datagen(full_frames.copy(), mel_chunks, box, static, face_det_batch_size, pads, nosmooth, img_size, wav2lip_batch_size, face_detection_path)
-    for i, (img_batch, mel_batch, frames, coords) in enumerate(tqdm(gen, total=int(np.ceil(float(len(mel_chunks)) / batch_size)))):
-        if i == 0:
-            img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device)
-            mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device)
-            compiled_wav2lip_model = core.compile_model(wav2lip_path, inference_device)
-            print("Model loaded")
-            frame_h, frame_w = full_frames[0].shape[:-1]
-            out = cv2.VideoWriter("C:/programacionEjercicios/miwav2lipv6/src/Wav2Lip/temp/result.avi", cv2.VideoWriter_fourcc(*"DIVX"), fps, (frame_w, frame_h))
-            pred_ov = compiled_wav2lip_model({"audio_sequences": mel_batch.numpy(), "face_sequences": img_batch.numpy()})[0]
-        else:
-            img_batch = np.transpose(img_batch, (0, 3, 1, 2))
-            mel_batch = np.transpose(mel_batch, (0, 3, 1, 2))
-            pred_ov = compiled_wav2lip_model({"audio_sequences": mel_batch, "face_sequences": img_batch})[0]
-        pred_ov = compiled_wav2lip_model({"audio_sequences": mel_batch, "face_sequences": img_batch})[0]
-        pred_ov = pred_ov.transpose(0, 2, 3, 1) * 255.0
-        for p, f, c in zip(pred_ov, frames, coords):
-            y1, y2, x1, x2 = c
-            p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
-            f[y1:y2, x1:x2] = p
-            out.write(f)
-    out.release()
-    command = "ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}".format(audio_path, "C:/programacionEjercicios/miwav2lipv6/src/Wav2Lip/temp/result.avi", outfile)
-    subprocess.call(command, shell=True)
-    return outfile

src/ov_wav2lip_helper.py DELETED Viewed

@@ -1,68 +0,0 @@
-import numpy as np
-import sys
-import os
-import openvino as ov
-import torch
-from pathlib import Path
-# Añade `src` al `sys.path` para que Python encuentre `utils/notebook_utils.py`
-sys.path.append(str(Path(__file__).resolve().parent))
-# Importa `download_file` desde `notebook_utils`
-from utils.notebook_utils import download_file
-from huggingface_hub import hf_hub_download
-from Wav2Lip.face_detection.detection.sfd.net_s3fd import s3fd
-from Wav2Lip.models import Wav2Lip
-def _load(checkpoint_path):
-    checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage)
-    return checkpoint
-def load_model(path):
-    model = Wav2Lip()
-    print("Load checkpoint from: {}".format(path))
-    checkpoint = _load(path)
-    s = checkpoint["state_dict"]
-    new_s = {}
-    for k, v in s.items():
-        new_s[k.replace("module.", "")] = v
-    model.load_state_dict(new_s)
-    return model.eval()
-def download_and_convert_models(ov_face_detection_model_path, ov_wav2lip_model_path):
-    models_urls = {"s3fd": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"}
-    path_to_detector = "checkpoints/face_detection.pth"
-    # Convert Face Detection Model
-    print("Convert Face Detection Model ...")
-    if not os.path.isfile(path_to_detector):
-        download_file(models_urls["s3fd"])
-        if not os.path.exists("checkpoints"):
-            os.mkdir("checkpoints")
-        os.replace("s3fd-619a316812.pth", path_to_detector)
-    model_weights = torch.load(path_to_detector)
-    face_detector = s3fd()
-    face_detector.load_state_dict(model_weights)
-    if not ov_face_detection_model_path.exists():
-        face_detection_dummy_inputs = torch.FloatTensor(np.random.rand(1, 3, 768, 576))
-        face_detection_ov_model = ov.convert_model(face_detector, example_input=face_detection_dummy_inputs)
-        ov.save_model(face_detection_ov_model, ov_face_detection_model_path)
-    print("Converted face detection OpenVINO model: ", ov_face_detection_model_path)
-    print("Convert Wav2Lip Model ...")
-    path_to_wav2lip = hf_hub_download(repo_id="numz/wav2lip_studio", filename="Wav2lip/wav2lip.pth", local_dir="checkpoints")
-    wav2lip = load_model(path_to_wav2lip)
-    img_batch = torch.FloatTensor(np.random.rand(123, 6, 96, 96))
-    mel_batch = torch.FloatTensor(np.random.rand(123, 1, 80, 16))
-    if not ov_wav2lip_model_path.exists():
-        example_inputs = {"audio_sequences": mel_batch, "face_sequences": img_batch}
-        wav2lip_ov_model = ov.convert_model(wav2lip, example_input=example_inputs)
-        ov.save_model(wav2lip_ov_model, ov_wav2lip_model_path)
-    print("Converted face detection OpenVINO model: ", ov_wav2lip_model_path)

src/run_inference.py DELETED Viewed

@@ -1,67 +0,0 @@
-import os
-from ov_inference import ov_inference
-import soundfile as sf
-import cv2
-def verificar_archivos(video_path, audio_path):
-    """
-    Verifica que los archivos de video y audio existen y son legibles.
-    Args:
-        video_path (str): Ruta del archivo de video.
-        audio_path (str): Ruta del archivo de audio.
-    Returns:
-        bool: True si ambos archivos son legibles, False en caso contrario.
-    """
-    # Verificar el archivo de video
-    if not os.path.exists(video_path):
-        print(f"Error: El archivo de video no existe en la ruta {video_path}")
-        return False
-    else:
-        # Intentar abrir el video
-        cap = cv2.VideoCapture(video_path)
-        if not cap.isOpened():
-            print(f"Error: No se puede abrir el archivo de video en {video_path}")
-            return False
-        else:
-            print(f"Archivo de video {video_path} está accesible.")
-        cap.release()
-    # Verificar el archivo de audio
-    if not os.path.exists(audio_path):
-        print(f"Error: El archivo de audio no existe en la ruta {audio_path}")
-        return False
-    else:
-        try:
-            # Intentar abrir el archivo de audio
-            with sf.SoundFile(audio_path) as audio_file:
-                print(f"Archivo de audio {audio_path} está accesible.")
-        except Exception as e:
-            print(f"Error al leer el archivo de audio: {e}")
-            return False
-    return True
-# Rutas de archivos
-#video_path = os.path.abspath("../miwav2lipv6/assets/video/data_video_sun_5s.mp4")
-video_path = os.path.abspath("../miwav2lipv6/assets/video/data_video_sun.mp4")
-#audio_path = os.path.abspath("../miwav2lipv6/assets/audio/grabacion_gradio.wav")
-audio_path = os.path.abspath("../miwav2lipv6/assets/audio/audio.wav")
-face_detection_path = os.path.abspath("../miwav2lipv6/models/face_detection.xml")
-wav2lip_path = os.path.abspath("../miwav2lipv6/models/wav2lip.xml")
-outfile = os.path.abspath("../miwav2lipv6/results/result_voice.mp4")
-# Verificar archivos antes de llamar a ov_inference
-if verificar_archivos(video_path, audio_path):
-    ov_inference(
-        video_path,
-        audio_path,
-        face_detection_path=face_detection_path,
-        wav2lip_path=wav2lip_path,
-        inference_device="CPU",
-        outfile=outfile,
-        resize_factor = 2,
-    )
-else:
-    print("No se pudo proceder con la inferencia debido a problemas con los archivos.")

src/text_to_speech.py DELETED Viewed

@@ -1,36 +0,0 @@
-# text_to_speech.py
-from gtts import gTTS
-import os
-# Rutas de los archivos
-#TRANSCRIPTION_TEXT_PATH = "C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt"
-TRANSCRIPTION_TEXT_PATH = "C:/programacionEjercicios/miwav2lipv6/results/OpenAI_response.txt"
-OUTPUT_AUDIO_PATH = "C:/programacionEjercicios/miwav2lipv6/assets/audio/audio.wav"
-def generar_audio_desde_texto():
-    """
-    Convierte el texto en `transcripcion.txt` a un archivo de audio en español (`audio.wav`).
-    """
-    try:
-        # Verificar si el archivo de transcripción existe
-        if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
-            print("Error: No se encontró el archivo de transcripción.")
-            return
-        # Leer el contenido de transcripcion.txt
-        with open(TRANSCRIPTION_TEXT_PATH, "r", encoding="utf-8") as file:
-            texto = file.read()
-        # Generar el audio en español usando gTTS
-        tts = gTTS(text=texto, lang='es', slow=False)
-        tts.save(OUTPUT_AUDIO_PATH)
-        print(f"Audio generado correctamente en: {OUTPUT_AUDIO_PATH}")
-    except Exception as e:
-        print(f"Error al generar el audio: {e}")
-if __name__ == "__main__":
-    generar_audio_desde_texto()

src/utils/notebook_utils.py DELETED Viewed

@@ -1,708 +0,0 @@
-import os
-import platform
-import sys
-import threading
-import time
-import urllib.parse
-from os import PathLike
-from pathlib import Path
-from typing import List, NamedTuple, Optional, Tuple
-from tqdm import tqdm
-import numpy as np
-from openvino.runtime import Core, Type, get_version
-from IPython.display import HTML, Image, display
-import openvino as ov
-from openvino.runtime.passes import Manager, MatcherPass, WrapType, Matcher
-from openvino.runtime import opset10 as ops
-# ## Files
-#
-# Load an image, download a file, download an IR model, and create a progress bar to show download progress.
-def device_widget(default="AUTO", exclude=None, added=None):
-    import openvino as ov
-    import ipywidgets as widgets
-    core = ov.Core()
-    supported_devices = core.available_devices + ["AUTO"]
-    exclude = exclude or []
-    if exclude:
-        for ex_device in exclude:
-            if ex_device in supported_devices:
-                supported_devices.remove(ex_device)
-    added = added or []
-    if added:
-        for add_device in added:
-            if add_device not in supported_devices:
-                supported_devices.append(add_device)
-    device = widgets.Dropdown(
-        options=supported_devices,
-        value=default,
-        description="Device:",
-        disabled=False,
-    )
-    return device
-def quantization_widget(default=True):
-    import ipywidgets as widgets
-    to_quantize = widgets.Checkbox(
-        value=default,
-        description="Quantization",
-        disabled=False,
-    )
-    return to_quantize
-def pip_install(*args):
-    import subprocess  # nosec - disable B404:import-subprocess check
-    cli_args = []
-    for arg in args:
-        cli_args.extend(str(arg).split(" "))
-    subprocess.run([sys.executable, "-m", "pip", "install", *cli_args], shell=(platform.system() == "Windows"), check=True)
-def load_image(path: str) -> np.ndarray:
-    """
-    Loads an image from `path` and returns it as BGR numpy array. `path`
-    should point to an image file, either a local filename or a url. The image is
-    not stored to the filesystem. Use the `download_file` function to download and
-    store an image.
-    :param path: Local path name or URL to image.
-    :return: image as BGR numpy array
-    """
-    import cv2
-    import requests
-    if path.startswith("http"):
-        # Set User-Agent to Mozilla because some websites block
-        # requests with User-Agent Python
-        response = requests.get(path, headers={"User-Agent": "Mozilla/5.0"})
-        array = np.asarray(bytearray(response.content), dtype="uint8")
-        image = cv2.imdecode(array, -1)  # Loads the image as BGR
-    else:
-        image = cv2.imread(path)
-    return image
-def download_file(
-    url: PathLike,
-    filename: PathLike = None,
-    directory: PathLike = None,
-    show_progress: bool = True,
-    silent: bool = False,
-    timeout: int = 10,
-) -> PathLike:
-    """
-    Download a file from a url and save it to the local filesystem. The file is saved to the
-    current directory by default, or to `directory` if specified. If a filename is not given,
-    the filename of the URL will be used.
-    :param url: URL that points to the file to download
-    :param filename: Name of the local file to save. Should point to the name of the file only,
-                     not the full path. If None the filename from the url will be used
-    :param directory: Directory to save the file to. Will be created if it doesn't exist
-                      If None the file will be saved to the current working directory
-    :param show_progress: If True, show an TQDM ProgressBar
-    :param silent: If True, do not print a message if the file already exists
-    :param timeout: Number of seconds before cancelling the connection attempt
-    :return: path to downloaded file
-    """
-    from tqdm.notebook import tqdm_notebook
-    import requests
-    filename = filename or Path(urllib.parse.urlparse(url).path).name
-    chunk_size = 16384  # make chunks bigger so that not too many updates are triggered for Jupyter front-end
-    filename = Path(filename)
-    if len(filename.parts) > 1:
-        raise ValueError(
-            "`filename` should refer to the name of the file, excluding the directory. "
-            "Use the `directory` parameter to specify a target directory for the downloaded file."
-        )
-    # create the directory if it does not exist, and add the directory to the filename
-    if directory is not None:
-        directory = Path(directory)
-        directory.mkdir(parents=True, exist_ok=True)
-        filename = directory / Path(filename)
-    try:
-        response = requests.get(url=url, headers={"User-agent": "Mozilla/5.0"}, stream=True)
-        response.raise_for_status()
-    except (
-        requests.exceptions.HTTPError
-    ) as error:  # For error associated with not-200 codes. Will output something like: "404 Client Error: Not Found for url: {url}"
-        raise Exception(error) from None
-    except requests.exceptions.Timeout:
-        raise Exception(
-            "Connection timed out. If you access the internet through a proxy server, please "
-            "make sure the proxy is set in the shell from where you launched Jupyter."
-        ) from None
-    except requests.exceptions.RequestException as error:
-        raise Exception(f"File downloading failed with error: {error}") from None
-    # download the file if it does not exist, or if it exists with an incorrect file size
-    filesize = int(response.headers.get("Content-length", 0))
-    if not filename.exists() or (os.stat(filename).st_size != filesize):
-        with tqdm(
-            total=filesize,
-            unit="B",
-            unit_scale=True,
-            unit_divisor=1024,
-            desc=str(filename),
-            disable=not show_progress,
-        ) as progress_bar:
-            with open(filename, "wb") as file_object:
-                for chunk in response.iter_content(chunk_size):
-                    file_object.write(chunk)
-                    progress_bar.update(len(chunk))
-                    progress_bar.refresh()
-    else:
-        if not silent:
-            print(f"'{filename}' already exists.")
-    response.close()
-    return filename.resolve()
-def download_ir_model(model_xml_url: str, destination_folder: PathLike = None) -> PathLike:
-    """
-    Download IR model from `model_xml_url`. Downloads model xml and bin file; the weights file is
-    assumed to exist at the same location and name as model_xml_url with a ".bin" extension.
-    :param model_xml_url: URL to model xml file to download
-    :param destination_folder: Directory where downloaded model xml and bin are saved. If None, model
-                               files are saved to the current directory
-    :return: path to downloaded xml model file
-    """
-    model_bin_url = model_xml_url[:-4] + ".bin"
-    model_xml_path = download_file(model_xml_url, directory=destination_folder, show_progress=False)
-    download_file(model_bin_url, directory=destination_folder)
-    return model_xml_path
-# ## Images
-# ### Convert Pixel Data
-#
-# Normalize image pixel values between 0 and 1, and convert images to RGB and BGR.
-# In[ ]:
-def normalize_minmax(data):
-    """
-    Normalizes the values in `data` between 0 and 1
-    """
-    if data.max() == data.min():
-        raise ValueError("Normalization is not possible because all elements of" f"`data` have the same value: {data.max()}.")
-    return (data - data.min()) / (data.max() - data.min())
-def to_rgb(image_data: np.ndarray) -> np.ndarray:
-    """
-    Convert image_data from BGR to RGB
-    """
-    import cv2
-    return cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
-def to_bgr(image_data: np.ndarray) -> np.ndarray:
-    """
-    Convert image_data from RGB to BGR
-    """
-    import cv2
-    return cv2.cvtColor(image_data, cv2.COLOR_RGB2BGR)
-# ## Videos
-# ### Video Player
-#
-# Custom video player to fulfill FPS requirements. You can set target FPS and output size, flip the video horizontally or skip first N frames.
-# In[ ]:
-class VideoPlayer:
-    """
-    Custom video player to fulfill FPS requirements. You can set target FPS and output size,
-    flip the video horizontally or skip first N frames.
-    :param source: Video source. It could be either camera device or video file.
-    :param size: Output frame size.
-    :param flip: Flip source horizontally.
-    :param fps: Target FPS.
-    :param skip_first_frames: Skip first N frames.
-    """
-    def __init__(self, source, size=None, flip=False, fps=None, skip_first_frames=0, width=1280, height=720):
-        import cv2
-        self.cv2 = cv2  # This is done to access the package in class methods
-        self.__cap = cv2.VideoCapture(source)
-        # try HD by default to get better video quality
-        self.__cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
-        self.__cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
-        if not self.__cap.isOpened():
-            raise RuntimeError(f"Cannot open {'camera' if isinstance(source, int) else ''} {source}")
-        # skip first N frames
-        self.__cap.set(cv2.CAP_PROP_POS_FRAMES, skip_first_frames)
-        # fps of input file
-        self.__input_fps = self.__cap.get(cv2.CAP_PROP_FPS)
-        if self.__input_fps <= 0:
-            self.__input_fps = 60
-        # target fps given by user
-        self.__output_fps = fps if fps is not None else self.__input_fps
-        self.__flip = flip
-        self.__size = None
-        self.__interpolation = None
-        if size is not None:
-            self.__size = size
-            # AREA better for shrinking, LINEAR better for enlarging
-            self.__interpolation = cv2.INTER_AREA if size[0] < self.__cap.get(cv2.CAP_PROP_FRAME_WIDTH) else cv2.INTER_LINEAR
-        # first frame
-        _, self.__frame = self.__cap.read()
-        self.__lock = threading.Lock()
-        self.__thread = None
-        self.__stop = False
-    """
-    Start playing.
-    """
-    def start(self):
-        self.__stop = False
-        self.__thread = threading.Thread(target=self.__run, daemon=True)
-        self.__thread.start()
-    """
-    Stop playing and release resources.
-    """
-    def stop(self):
-        self.__stop = True
-        if self.__thread is not None:
-            self.__thread.join()
-        self.__cap.release()
-    def __run(self):
-        prev_time = 0
-        while not self.__stop:
-            t1 = time.time()
-            ret, frame = self.__cap.read()
-            if not ret:
-                break
-            # fulfill target fps
-            if 1 / self.__output_fps < time.time() - prev_time:
-                prev_time = time.time()
-                # replace by current frame
-                with self.__lock:
-                    self.__frame = frame
-            t2 = time.time()
-            # time to wait [s] to fulfill input fps
-            wait_time = 1 / self.__input_fps - (t2 - t1)
-            # wait until
-            time.sleep(max(0, wait_time))
-        self.__frame = None
-    """
-    Get current frame.
-    """
-    def next(self):
-        import cv2
-        with self.__lock:
-            if self.__frame is None:
-                return None
-            # need to copy frame, because can be cached and reused if fps is low
-            frame = self.__frame.copy()
-        if self.__size is not None:
-            frame = self.cv2.resize(frame, self.__size, interpolation=self.__interpolation)
-        if self.__flip:
-            frame = self.cv2.flip(frame, 1)
-        return frame
-# ## Visualization
-# ### Segmentation
-#
-# Define a SegmentationMap NamedTuple that keeps the labels and colormap for a segmentation project/dataset. Create CityScapesSegmentation and BinarySegmentation SegmentationMaps. Create a function to convert a segmentation map to an RGB image with a colormap, and to show the segmentation result as an overlay over the original image.
-# In[ ]:
-class Label(NamedTuple):
-    index: int
-    color: Tuple
-    name: Optional[str] = None
-# In[ ]:
-class SegmentationMap(NamedTuple):
-    labels: List
-    def get_colormap(self):
-        return np.array([label.color for label in self.labels])
-    def get_labels(self):
-        labelnames = [label.name for label in self.labels]
-        if any(labelnames):
-            return labelnames
-        else:
-            return None
-# In[ ]:
-cityscape_labels = [
-    Label(index=0, color=(128, 64, 128), name="road"),
-    Label(index=1, color=(244, 35, 232), name="sidewalk"),
-    Label(index=2, color=(70, 70, 70), name="building"),
-    Label(index=3, color=(102, 102, 156), name="wall"),
-    Label(index=4, color=(190, 153, 153), name="fence"),
-    Label(index=5, color=(153, 153, 153), name="pole"),
-    Label(index=6, color=(250, 170, 30), name="traffic light"),
-    Label(index=7, color=(220, 220, 0), name="traffic sign"),
-    Label(index=8, color=(107, 142, 35), name="vegetation"),
-    Label(index=9, color=(152, 251, 152), name="terrain"),
-    Label(index=10, color=(70, 130, 180), name="sky"),
-    Label(index=11, color=(220, 20, 60), name="person"),
-    Label(index=12, color=(255, 0, 0), name="rider"),
-    Label(index=13, color=(0, 0, 142), name="car"),
-    Label(index=14, color=(0, 0, 70), name="truck"),
-    Label(index=15, color=(0, 60, 100), name="bus"),
-    Label(index=16, color=(0, 80, 100), name="train"),
-    Label(index=17, color=(0, 0, 230), name="motorcycle"),
-    Label(index=18, color=(119, 11, 32), name="bicycle"),
-    Label(index=19, color=(255, 255, 255), name="background"),
-]
-CityScapesSegmentation = SegmentationMap(cityscape_labels)
-binary_labels = [
-    Label(index=0, color=(255, 255, 255), name="background"),
-    Label(index=1, color=(0, 0, 0), name="foreground"),
-]
-BinarySegmentation = SegmentationMap(binary_labels)
-# In[ ]:
-def segmentation_map_to_image(result: np.ndarray, colormap: np.ndarray, remove_holes: bool = False) -> np.ndarray:
-    """
-    Convert network result of floating point numbers to an RGB image with
-    integer values from 0-255 by applying a colormap.
-    :param result: A single network result after converting to pixel values in H,W or 1,H,W shape.
-    :param colormap: A numpy array of shape (num_classes, 3) with an RGB value per class.
-    :param remove_holes: If True, remove holes in the segmentation result.
-    :return: An RGB image where each pixel is an int8 value according to colormap.
-    """
-    import cv2
-    if len(result.shape) != 2 and result.shape[0] != 1:
-        raise ValueError(f"Expected result with shape (H,W) or (1,H,W), got result with shape {result.shape}")
-    if len(np.unique(result)) > colormap.shape[0]:
-        raise ValueError(
-            f"Expected max {colormap[0]} classes in result, got {len(np.unique(result))} "
-            "different output values. Please make sure to convert the network output to "
-            "pixel values before calling this function."
-        )
-    elif result.shape[0] == 1:
-        result = result.squeeze(0)
-    result = result.astype(np.uint8)
-    contour_mode = cv2.RETR_EXTERNAL if remove_holes else cv2.RETR_TREE
-    mask = np.zeros((result.shape[0], result.shape[1], 3), dtype=np.uint8)
-    for label_index, color in enumerate(colormap):
-        label_index_map = result == label_index
-        label_index_map = label_index_map.astype(np.uint8) * 255
-        contours, hierarchies = cv2.findContours(label_index_map, contour_mode, cv2.CHAIN_APPROX_SIMPLE)
-        cv2.drawContours(
-            mask,
-            contours,
-            contourIdx=-1,
-            color=color.tolist(),
-            thickness=cv2.FILLED,
-        )
-    return mask
-def segmentation_map_to_overlay(image, result, alpha, colormap, remove_holes=False) -> np.ndarray:
-    """
-    Returns a new image where a segmentation mask (created with colormap) is overlayed on
-    the source image.
-    :param image: Source image.
-    :param result: A single network result after converting to pixel values in H,W or 1,H,W shape.
-    :param alpha: Alpha transparency value for the overlay image.
-    :param colormap: A numpy array of shape (num_classes, 3) with an RGB value per class.
-    :param remove_holes: If True, remove holes in the segmentation result.
-    :return: An RGP image with segmentation mask overlayed on the source image.
-    """
-    import cv2
-    if len(image.shape) == 2:
-        image = np.repeat(np.expand_dims(image, -1), 3, 2)
-    mask = segmentation_map_to_image(result, colormap, remove_holes)
-    image_height, image_width = image.shape[:2]
-    mask = cv2.resize(src=mask, dsize=(image_width, image_height))
-    return cv2.addWeighted(mask, alpha, image, 1 - alpha, 0)
-# ### Network Results
-#
-# Show network result image, optionally together with the source image and a legend with labels.
-# In[ ]:
-def viz_result_image(
-    result_image: np.ndarray,
-    source_image: np.ndarray = None,
-    source_title: str = None,
-    result_title: str = None,
-    labels: List[Label] = None,
-    resize: bool = False,
-    bgr_to_rgb: bool = False,
-    hide_axes: bool = False,
-):
-    """
-    Show result image, optionally together with source images, and a legend with labels.
-    :param result_image: Numpy array of RGB result image.
-    :param source_image: Numpy array of source image. If provided this image will be shown
-                         next to the result image. source_image is expected to be in RGB format.
-                         Set bgr_to_rgb to True if source_image is in BGR format.
-    :param source_title: Title to display for the source image.
-    :param result_title: Title to display for the result image.
-    :param labels: List of labels. If provided, a legend will be shown with the given labels.
-    :param resize: If true, resize the result image to the same shape as the source image.
-    :param bgr_to_rgb: If true, convert the source image from BGR to RGB. Use this option if
-                       source_image is a BGR image.
-    :param hide_axes: If true, do not show matplotlib axes.
-    :return: Matplotlib figure with result image
-    """
-    import cv2
-    import matplotlib.pyplot as plt
-    from matplotlib.lines import Line2D
-    if bgr_to_rgb:
-        source_image = to_rgb(source_image)
-    if resize:
-        result_image = cv2.resize(result_image, (source_image.shape[1], source_image.shape[0]))
-    num_images = 1 if source_image is None else 2
-    fig, ax = plt.subplots(1, num_images, figsize=(16, 8), squeeze=False)
-    if source_image is not None:
-        ax[0, 0].imshow(source_image)
-        ax[0, 0].set_title(source_title)
-    ax[0, num_images - 1].imshow(result_image)
-    ax[0, num_images - 1].set_title(result_title)
-    if hide_axes:
-        for a in ax.ravel():
-            a.axis("off")
-    if labels:
-        colors = labels.get_colormap()
-        lines = [
-            Line2D(
-                [0],
-                [0],
-                color=[item / 255 for item in c.tolist()],
-                linewidth=3,
-                linestyle="-",
-            )
-            for c in colors
-        ]
-        plt.legend(
-            lines,
-            labels.get_labels(),
-            bbox_to_anchor=(1, 1),
-            loc="upper left",
-            prop={"size": 12},
-        )
-    plt.close(fig)
-    return fig
-# ### Live Inference
-# In[ ]:
-def show_array(frame: np.ndarray, display_handle=None):
-    """
-    Display array `frame`. Replace information at `display_handle` with `frame`
-    encoded as jpeg image. `frame` is expected to have data in BGR order.
-    Create a display_handle with: `display_handle = display(display_id=True)`
-    """
-    import cv2
-    _, frame = cv2.imencode(ext=".jpeg", img=frame)
-    if display_handle is None:
-        display_handle = display(Image(data=frame.tobytes()), display_id=True)
-    else:
-        display_handle.update(Image(data=frame.tobytes()))
-    return display_handle
-# ## Checks and Alerts
-#
-# Create an alert class to show stylized info/error/warning messages and a `check_device` function that checks whether a given device is available.
-# In[ ]:
-class NotebookAlert(Exception):
-    def __init__(self, message: str, alert_class: str):
-        """
-        Show an alert box with the given message.
-        :param message: The message to display.
-        :param alert_class: The class for styling the message. Options: info, warning, success, danger.
-        """
-        self.message = message
-        self.alert_class = alert_class
-        self.show_message()
-    def show_message(self):
-        display(HTML(f"""<div class="alert alert-{self.alert_class}">{self.message}"""))
-class DeviceNotFoundAlert(NotebookAlert):
-    def __init__(self, device: str):
-        """
-        Show a warning message about an unavailable device. This class does not check whether or
-        not the device is available, use the `check_device` function to check this. `check_device`
-        also shows the warning if the device is not found.
-        :param device: The unavailable device.
-        :return: A formatted alert box with the message that `device` is not available, and a list
-                 of devices that are available.
-        """
-        ie = Core()
-        supported_devices = ie.available_devices
-        self.message = f"Running this cell requires a {device} device, " "which is not available on this system. "
-        self.alert_class = "warning"
-        if len(supported_devices) == 1:
-            self.message += f"The following device is available: {ie.available_devices[0]}"
-        else:
-            self.message += "The following devices are available: " f"{', '.join(ie.available_devices)}"
-        super().__init__(self.message, self.alert_class)
-def check_device(device: str) -> bool:
-    """
-    Check if the specified device is available on the system.
-    :param device: Device to check. e.g. CPU, GPU
-    :return: True if the device is available, False if not. If the device is not available,
-             a DeviceNotFoundAlert will be shown.
-    """
-    ie = Core()
-    if device not in ie.available_devices:
-        DeviceNotFoundAlert(device)
-        return False
-    else:
-        return True
-def check_openvino_version(version: str) -> bool:
-    """
-    Check if the specified OpenVINO version is installed.
-    :param version: the OpenVINO version to check. Example: 2021.4
-    :return: True if the version is installed, False if not. If the version is not installed,
-             an alert message will be shown.
-    """
-    installed_version = get_version()
-    if version not in installed_version:
-        NotebookAlert(
-            f"This notebook requires OpenVINO {version}. "
-            f"The version on your system is: <i>{installed_version}</i>.<br>"
-            "Please run <span style='font-family:monospace'>pip install --upgrade -r requirements.txt</span> "
-            "in the openvino_env environment to install this version. "
-            "See the <a href='https://github.com/openvinotoolkit/openvino_notebooks'>"
-            "OpenVINO Notebooks README</a> for detailed instructions",
-            alert_class="danger",
-        )
-        return False
-    else:
-        return True
-packed_layername_tensor_dict_list = [{"name": "aten::mul/Multiply"}]
-class ReplaceTensor(MatcherPass):
-    def __init__(self, packed_layername_tensor_dict_list):
-        MatcherPass.__init__(self)
-        self.model_changed = False
-        param = WrapType("opset10.Multiply")
-        def callback(matcher: Matcher) -> bool:
-            root = matcher.get_match_root()
-            if root is None:
-                return False
-            for y in packed_layername_tensor_dict_list:
-                root_name = root.get_friendly_name()
-                if root_name.find(y["name"]) != -1:
-                    max_fp16 = np.array([[[[-np.finfo(np.float16).max]]]]).astype(np.float32)
-                    new_tenser = ops.constant(max_fp16, Type.f32, name="Constant_4431")
-                    root.set_arguments([root.input_value(0).node, new_tenser])
-                    packed_layername_tensor_dict_list.remove(y)
-            return True
-        self.register_matcher(Matcher(param, "ReplaceTensor"), callback)
-def optimize_bge_embedding(model_path, output_model_path):
-    """
-    optimize_bge_embedding used to optimize BGE model for NPU device
-    Arguments:
-        model_path {str} -- original BGE IR model path
-        output_model_path {str} -- Converted BGE IR model path
-    """
-    core = Core()
-    ov_model = core.read_model(model_path)
-    manager = Manager()
-    manager.register_pass(ReplaceTensor(packed_layername_tensor_dict_list))
-    manager.run_passes(ov_model)
-    ov.save_model(ov_model, output_model_path, compress_to_fp16=False)

src/utils/pip_helper.py DELETED Viewed

@@ -1,10 +0,0 @@
-import sys
-def pip_install(*args):
-    import subprocess  # nosec - disable B404:import-subprocess check
-    cli_args = []
-    for arg in args:
-        cli_args.extend(str(arg).split(" "))
-    subprocess.run([sys.executable, "-m", "pip", "install", *cli_args], check=True)

src/whisper_audio_extractor.py DELETED Viewed

@@ -1,47 +0,0 @@
-# whisper_audio_extractor.py
-import sounddevice as sd
-from scipy.io.wavfile import write
-import whisper
-import os
-# Ruta para guardar el archivo de audio temporalmente
-AUDIO_PATH = os.path.join("..", "assets", "audio", "recorded_audio.wav")
-def record_audio(duration=5, sample_rate=44100):
-    """
-    Graba el audio del micrófono durante un tiempo específico y lo guarda como archivo WAV.
-    Args:
-        duration (int): Duración de la grabación en segundos.
-        sample_rate (int): Frecuencia de muestreo del audio.
-    """
-    print("Grabando...")
-    audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=2)
-    sd.wait()  # Espera a que finalice la grabación
-    write(AUDIO_PATH, sample_rate, audio_data)  # Guarda el audio en el directorio especificado
-    print(f"Grabación completa. Archivo guardado en {AUDIO_PATH}")
-def transcribe_audio():
-    """
-    Usa el modelo Whisper para transcribir el audio grabado y devuelve el texto.
-    Returns:
-        str: Texto transcrito del audio.
-    """
-    # Cargar el modelo de Whisper
-    model = whisper.load_model("base")
-    # Transcribir el audio
-    print("Transcribiendo el audio...")
-    result = model.transcribe(AUDIO_PATH)
-    print("Transcripción completada.")
-    return result["text"]
-if __name__ == "__main__":
-    # Paso 1: Grabar audio
-    record_audio()
-    # Paso 2: Transcribir audio
-    texto = transcribe_audio()
-    print("Texto extraído:", texto)

src/whisper_audio_transcriber.py DELETED Viewed

@@ -1,109 +0,0 @@
-# whisper_audio_transcriber.py
-import os
-from pathlib import Path
-import requests
-import librosa
-from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, pipeline
-from transformers.utils import logging
-import soundfile as sf
-# Definición de modelos
-model_ids = {
-    "Multilingual models": [
-        "openai/whisper-large-v3-turbo",
-        "openai/whisper-large-v3",
-        "openai/whisper-large-v2",
-        "openai/whisper-large",
-        "openai/whisper-medium",
-        "openai/whisper-small",
-        "openai/whisper-base",
-        "openai/whisper-tiny",
-    ],
-    "English-only models": [
-        "distil-whisper/distil-large-v2",
-        "distil-whisper/distil-large-v3",
-        "distil-whisper/distil-medium.en",
-        "distil-whisper/distil-small.en",
-        "openai/whisper-medium.en",
-        "openai/whisper-small.en",
-        "openai/whisper-base.en",
-        "openai/whisper-tiny.en",
-    ],
-}
-def download_file(url, filename, directory="."):
-    """
-    Descarga un archivo desde una URL y lo guarda en el directorio especificado.
-    """
-    os.makedirs(directory, exist_ok=True)
-    filepath = Path(directory) / filename
-    response = requests.get(url)
-    filepath.write_bytes(response.content)
-    return filepath
-def transcribe_audio(file_path, model_name):
-    """
-    Transcribe el audio utilizando un modelo de Whisper.
-    Args:
-        file_path (str): Ruta del archivo de audio.
-        model_name (str): Nombre del modelo de Whisper.
-    Returns:
-        str: Transcripción del audio.
-    """
-    processor = AutoProcessor.from_pretrained(model_name)
-    model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name)
-    # Crear pipeline para transcripción
-    pipe = pipeline(
-        "automatic-speech-recognition",
-        model=model,
-        tokenizer=processor.tokenizer,
-        feature_extractor=processor.feature_extractor,
-        device="cpu",  # Cambiar a "cuda" si tienes una GPU disponible
-    )
-    # Cargar el archivo de audio
-    audio_data, samplerate = librosa.load(file_path, sr=16000)
-    # Transcribir el audio
-    result = pipe(audio_data)
-    return result["text"]
-def guardar_transcripcion(texto, filename="transcripcion.txt", directory="../results"):
-    """
-    Guarda el texto transcrito en un archivo .txt en el directorio especificado.
-    Args:
-        texto (str): Texto transcrito que se desea guardar.
-        filename (str): Nombre del archivo .txt.
-        directory (str): Directorio donde se guardará el archivo.
-    """
-    os.makedirs(directory, exist_ok=True)  # Crea el directorio si no existe
-    file_path = Path(directory) / filename
-    with open(file_path, "w", encoding="utf-8") as f:
-        f.write(texto)
-    print(f"Transcripción guardada en: {file_path}")
-def main():
-    # Configuración de logging para errores únicamente
-    logging.set_verbosity_error()
-    # Ruta del archivo de audio
-    audio_path = os.path.abspath("../miwav2lipv6/assets/audio/grabacion_gradio.wav")
-    # Modelo seleccionado
-    model_name = "openai/whisper-large"  # Cambia esto al modelo deseado
-    # Transcribir el audio
-    print(f"Transcribiendo el audio del archivo: {audio_path}")
-    transcription = transcribe_audio(audio_path, model_name)
-    print(f"Transcripción: {transcription}")
-    # Guardar la transcripción en un archivo .txt
-    guardar_transcripcion(transcription)
-if __name__ == "__main__":
-    main()

tests/test_whisper_audio_extractor.py DELETED Viewed

@@ -1,29 +0,0 @@
-import os
-import pytest
-from src.whisper_audio_extractor import record_audio, transcribe_audio, AUDIO_PATH
-def test_record_audio():
-    """
-    Verifica que la función de grabación crea un archivo de audio con un tamaño válido.
-    """
-    # Ejecuta la grabación con una duración de prueba corta
-    record_audio(duration=2)  # Graba por 2 segundos para el test
-    # Comprueba si el archivo de audio existe
-    assert os.path.exists(AUDIO_PATH), "El archivo de audio no fue creado."
-    # Comprueba que el archivo no esté vacío
-    assert os.path.getsize(AUDIO_PATH) > 0, "El archivo de audio está vacío."
-def test_transcribe_audio():
-    """
-    Verifica que la función de transcripción devuelve texto.
-    """
-    # Ejecuta la transcripción del audio grabado
-    transcription = transcribe_audio()
-    # Asegura que se obtuvo texto
-    assert isinstance(transcription, str) and len(transcription) > 0, "La transcripción está vacía o no es texto."
-if __name__ == "__main__":
-    pytest.main()