.gitignore CHANGED
@@ -1,25 +1,24 @@
1
- #Ignorar entorno virtual
2
- env/
3
-
4
- #Ignorar archivos y carpetas de compilacion
5
- __pycache__/
6
- *.pyc
7
- *.pyo
8
- *.py[cod]
9
- .vscode/
10
- .DS_Store
11
-
12
- #Ignorar archivos de log y salida de pruebas
13
- *.log
14
- *.out
15
- *.tmp
16
-
17
-
18
- #Ignorar modelos y checkpoints
19
- models/
20
- checkpoints/
21
- src/Wav2Lip/
22
- assets/
23
- data/
24
- #Archivos temporales y de sistema
25
-
 
1
+ #Ignorar entorno virtual
2
+ env/
3
+
4
+ #Ignorar archivos y carpetas de compilacion
5
+ __pycache__/
6
+ *.pyc
7
+ *.pyo
8
+ *.py[cod]
9
+ .vscode/
10
+ .DS_Store
11
+
12
+ #Ignorar archivos de log y salida de pruebas
13
+ *.log
14
+ *.out
15
+ *.tmp
16
+
17
+
18
+ #Ignorar modelos y checkpoints
19
+ models/
20
+ checkpoints/
21
+ src/Wav2Lip/
22
+ assets/
23
+ data/
24
+ #Archivos temporales y de sistema
 
app.py DELETED
@@ -1,183 +0,0 @@
1
- # interfaceV2.py
2
-
3
- import gradio as gr
4
- import sounddevice as sd
5
- from scipy.io.wavfile import write
6
- import tempfile
7
- import shutil
8
- import os
9
- import subprocess
10
- import sys
11
- from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
12
- from call_openai_api import moni as rtff # Asegúrate de que el archivo call_open_api.py esté en el mismo directorio
13
-
14
-
15
- # Paths to files (adjusted as per your specified structure)
16
- AUDIO_RECORD_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/grabacion_gradio.wav")
17
- #VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun_5s.mp4")
18
- VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun.mp4")
19
- #TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
20
- TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
21
- RESULT_AUDIO_TEMP_PATH = os.path.abspath( "C:/programacionEjercicios/miwav2lipv6/results/audiov2.wav")
22
- RESULT_AUDIO_FINAL_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/audio.wav")
23
- RESULT_VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/result_voice.mp4")
24
- TEXT_TO_SPEECH_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/text_to_speech.py")
25
-
26
- # Function to record 8-second audio
27
- def grabar_audio(duration=8, sample_rate=44100):
28
- print("Starting recording...")
29
- audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
30
- print(f"Recording in progress for {duration} seconds...")
31
- sd.wait()
32
- print("Recording completed.")
33
-
34
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
35
- write(temp_audio.name, sample_rate, audio_data)
36
- print("Audio temporarily saved at:", temp_audio.name)
37
- temp_audio.close() # Asegúrate de cerrarlo antes de usarlo
38
- os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
39
- shutil.copy(temp_audio.name, AUDIO_RECORD_PATH)
40
- print(f"Recording copied to: {AUDIO_RECORD_PATH}")
41
-
42
- return AUDIO_RECORD_PATH, "Recording completed."
43
-
44
- # Function to transcribe audio with Whisper
45
- def transcribir_con_progreso(audio_path):
46
- progreso = gr.Progress()
47
- progreso(0, "Starting transcription...")
48
- model_name = "openai/whisper-large"
49
- progreso(25, "Loading Whisper model...")
50
-
51
- transcripcion = transcribe_audio(audio_path, model_name)
52
- progreso(75, "Saving transcription...")
53
- guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
54
- progreso(100, "Transcription completed.")
55
- if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
56
- raise FileNotFoundError(f"El archivo {TRANSCRIPTION_TEXT_PATH} no se generó.")
57
-
58
- return transcripcion
59
-
60
- # Function to convert text to audio using text_to_speech.py
61
- def generar_audio_desde_texto():
62
- print("Generating audio from text...")
63
- result = subprocess.run(
64
- [sys.executable, TEXT_TO_SPEECH_PATH],
65
- capture_output=True,
66
- text=True
67
- )
68
- if result.returncode != 0:
69
- raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
70
- if result.stdout:
71
- print("Output:", result.stdout)
72
- if result.stderr:
73
- print("Errors:", result.stderr)
74
-
75
- if os.path.exists(RESULT_AUDIO_TEMP_PATH):
76
- print(f"Temporary audio generated at: {RESULT_AUDIO_TEMP_PATH}")
77
-
78
- os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
79
- shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
80
- print(f"Final audio copied to: {RESULT_AUDIO_FINAL_PATH}")
81
-
82
- return RESULT_AUDIO_FINAL_PATH
83
- else:
84
- print(f"Error: Audio file was not generated in {RESULT_AUDIO_FINAL_PATH} ")
85
- return None
86
-
87
- # Function to process video and audio using run_inference.py with the generated audio file
88
- def procesar_video_audio():
89
- print("Starting video and audio processing...")
90
- run_inference_path = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/run_inference.py")
91
-
92
- result = subprocess.run(
93
- [sys.executable, run_inference_path, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
94
- capture_output=True,
95
- text=True
96
- )
97
-
98
- if result.stdout:
99
- print("Output:", result.stdout)
100
- if result.stderr:
101
- print("Errors:", result.stderr)
102
-
103
- if os.path.exists(RESULT_VIDEO_PATH):
104
- print(f"Processed video saved at: {RESULT_VIDEO_PATH}")
105
- return RESULT_VIDEO_PATH
106
- else:
107
- print("Error: Video file was not generated at 'results/result_voice.mp4'")
108
- return None
109
-
110
- # Gradio Interface Configuration
111
- def interfaz():
112
- with gr.Blocks() as demo:
113
- with gr.Row():
114
- with gr.Column():
115
- gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
116
- grabar_button = gr.Button("Comenzando la grabacion de audio")
117
- estado_grabacion = gr.Textbox(label="Recording Status", interactive=False)
118
-
119
- with gr.Column():
120
- output_audio = gr.Audio(AUDIO_RECORD_PATH, label="Audio Grabado", interactive=False)
121
- output_audio_speech = gr.Audio(RESULT_AUDIO_FINAL_PATH, label="Audio TTS", interactive=False)
122
- video_resultado = gr.Video(RESULT_VIDEO_PATH,label="Video procesado", interactive=False)
123
- texto_transcripcion = gr.Textbox(label="Texto transcrito")
124
- progreso_transcripcion = gr.Textbox(label="Transcription Status", interactive=False)
125
-
126
- # Full flow: recording, transcription, text-to-speech, and video processing
127
- """
128
- def flujo_completo():
129
- _, mensaje_grabacion = grabar_audio()
130
- transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
131
- audio_generado = generar_audio_desde_texto()
132
- video_path = procesar_video_audio()
133
-
134
- # Ensure function always returns 5 outputs for Gradio, even in error cases
135
- if video_path and audio_generado:
136
- return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
137
- else:
138
- return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado or "Audio generation failed", video_path or "Video generation failed"
139
- """
140
- def flujo_completo():
141
- try:
142
- print("Inicio del flujo completo...")
143
- # Grabar audio
144
- audio_path, mensaje_grabacion = grabar_audio()
145
- print("Audio grabado en:", audio_path)
146
- # Transcribir audio
147
- transcripcion = transcribir_con_progreso(audio_path)
148
- print("Transcripción completada:", transcripcion)
149
-
150
- #respuesta_openai = rtff(transcripcion)
151
- respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
152
- print("Respuesta generada por OpenAI")
153
-
154
- # Generar audio desde texto
155
- audio_generado = generar_audio_desde_texto()
156
- print("Audio generado:", audio_generado)
157
- # Procesar video y audio
158
- video_path = procesar_video_audio()
159
- print("Video procesado en:", video_path)
160
- # Devolver resultados si todo fue exitoso
161
- return mensaje_grabacion, audio_path, transcripcion, audio_generado, video_path
162
-
163
- except Exception as e:
164
- # Imprime el error en la terminal y regresa mensajes de error a la interfaz
165
- print("Error detectado en flujo completo:", str(e))
166
- return (
167
- "Error durante el flujo completo",
168
- None, # Audio grabado
169
- f"Error: {str(e)}", # Transcripción
170
- None, # Audio generado
171
- None # Video procesado
172
- )
173
-
174
- grabar_button.click(
175
- flujo_completo,
176
- outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
177
- )
178
-
179
- return demo
180
-
181
- if __name__ == "__main__":
182
- demo = interfaz()
183
- demo.launch(allowed_paths=["C:/programacionEjercicios/miwav2lipv6/assets", "C:/programacionEjercicios/miwav2lipv6/results"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
estructura_proyecto.txt CHANGED
@@ -1,34 +1,34 @@
1
- proyecto_root/
2
-
3
- ├── assets/
4
- ├── video/
5
- │ │ ├──data_video_sun_5s.mp4
6
- │ └── audio/
7
- │ └──data_audio_sun_5s.wav
8
- │ └── # Archivos de datos, audio y video de prueba, como `data_audio_sun_5s.wav`
9
-
10
- ├── checkpoints/
11
- │ └── # Modelos y checkpoints preentrenados, como `wav2lip_gan.pth`
12
-
13
- ├── models/
14
- │ └── # Modelos convertidos a OpenVINO IR, como `face_detection.xml` y `wav2lip.xml`
15
-
16
- ├── src/
17
- | ├── utils/
18
- | ├── Wav2Lip/
19
- │ ├── convert_models.py
20
- │ ├── gradio_helper.py
21
- │ ├── ov_inference.py
22
- │ ├── ov_wav2lip_helper.py
23
- │ └── run_inference
24
-
25
- ├── tests/
26
- │ └── # Scripts de pruebas para verificar la funcionalidad de tu código
27
-
28
- ├── results/
29
- │ └── result_voice.mp4
30
-
31
- ├── requirements.txt # Lista de dependencias del proyecto
32
- ├── setup.py # Script de configuración del proyecto
33
- ├── estructura_proyecto.py # Script de configuración del proyecto
34
- └── README.md # Documentación del proyecto
 
1
+ proyecto_root/
2
+
3
+ ├── assets/
4
+ ├── video/
5
+ │ │ ├──data_video_sun_5s.mp4
6
+ │ └── audio/
7
+ │ └──data_audio_sun_5s.wav
8
+ │ └── # Archivos de datos, audio y video de prueba, como `data_audio_sun_5s.wav`
9
+
10
+ ├── checkpoints/
11
+ │ └── # Modelos y checkpoints preentrenados, como `wav2lip_gan.pth`
12
+
13
+ ├── models/
14
+ │ └── # Modelos convertidos a OpenVINO IR, como `face_detection.xml` y `wav2lip.xml`
15
+
16
+ ├── src/
17
+ | ├── utils/
18
+ | ├── Wav2Lip/
19
+ │ ├── convert_models.py
20
+ │ ├── gradio_helper.py
21
+ │ ├── ov_inference.py
22
+ │ ├── ov_wav2lip_helper.py
23
+ │ └── run_inference
24
+
25
+ ├── tests/
26
+ │ └── # Scripts de pruebas para verificar la funcionalidad de tu código
27
+
28
+ ├── results/
29
+ │ └── result_voice.mp4
30
+
31
+ ├── requirements.txt # Lista de dependencias del proyecto
32
+ ├── setup.py # Script de configuración del proyecto
33
+ ├── estructura_proyecto.py # Script de configuración del proyecto
34
+ └── README.md # Documentación del proyecto
requirements.txt CHANGED
@@ -1,30 +1,31 @@
1
- openvino>=2024.4.0
2
- huggingface_hub
3
- torch>=2.1
4
- gradio>=4.19
5
- librosa==0.9.2
6
- opencv-contrib-python
7
- opencv-python
8
- IPython
9
- tqdm
10
- numba
11
- numpy
12
-
13
- openai-whisper
14
- sounddevice
15
- scipy
16
-
17
- transformers>=4.35
18
- torchvision>=0.18.1
19
- onnx>=1.16.1
20
- optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
21
- openvino
22
- openvino-tokenizers
23
- openvino-genai
24
- datasets
25
- soundfile>=0.12
26
- python-ffmpeg<=1.0.16
27
- nncf>=2.13.0
28
- jiwer
29
-
30
- gtts
 
 
1
+ openvino>=2024.4.0
2
+ huggingface_hub
3
+ torch>=2.1
4
+ gradio>=4.19
5
+ librosa==0.9.2
6
+ opencv-contrib-python
7
+ opencv-python
8
+ IPython
9
+ tqdm
10
+ numba
11
+ numpy
12
+
13
+ openai-whisper
14
+ sounddevice
15
+ scipy
16
+
17
+ transformers>=4.35
18
+ torchvision>=0.18.1
19
+ onnx>=1.16.1
20
+ optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
21
+ openvino
22
+ openvino-tokenizers
23
+ openvino-genai
24
+ datasets
25
+ soundfile>=0.12
26
+ python-ffmpeg<=1.0.16
27
+ nncf>=2.13.0
28
+ jiwer
29
+
30
+ gtts
31
+
results/OpenAI_response.txt DELETED
@@ -1,5 +0,0 @@
1
- Hola, prueba en marcha,
2
- María con IA se realza,
3
- Nuevo modelo se lanza,
4
- Incorporación, esperanza,
5
- Ser mejor, nuestra balanza.
 
 
 
 
 
 
results/transcripcion.txt DELETED
@@ -1 +0,0 @@
1
- Hola, esta es una prueba para ver si podemos incorporar este modelo a María, María RB.
 
 
setup.py CHANGED
@@ -1,274 +1,287 @@
1
- # 2024/03/11 setup.py
2
-
3
- import os
4
- import subprocess
5
- import sys
6
- import requests
7
-
8
- from pathlib import Path
9
-
10
- # Definición de las carpetas del proyecto
11
- PROJECT_DIRECTORIES = [
12
- "assets",
13
- "assets/audio",
14
- "assets/video",
15
- "checkpoints",
16
- "models",
17
- "src",
18
- "src/utils",
19
- "tests",
20
- "results"
21
- ]
22
-
23
- # URLs de las utilidades de OpenVINO Notebooks
24
- OPENVINO_UTILS = {
25
- "notebook_utils.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
26
- "pip_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py"
27
- }
28
-
29
- # URLs de los archivos de ayuda de Wav2Lip
30
- WAV2LIP_HELPERS = {
31
- "gradio_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/gradio_helper.py",
32
- "ov_inference.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/ov_inference.py",
33
- "ov_wav2lip_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/ov_wav2lip_helper.py"
34
- }
35
-
36
- WAV2LIP_HELPERS_DIR = Path("src")
37
- OPENVINO_UTILS_DIR = Path("src/utils")
38
-
39
- # URLs de los archivos de ejemplo de entrada
40
- EXAMPLE_FILES = {
41
- "audio_example": {
42
- "filename": "data_audio_sun_5s.wav",
43
- "url": "https://github.com/sammysun0711/openvino_aigc_samples/blob/main/Wav2Lip/data_audio_sun_5s.wav?raw=true",
44
- "folder": "assets/audio"
45
- },
46
- "video_example": {
47
- "filename": "data_video_sun_5s.mp4",
48
- "url": "https://github.com/sammysun0711/openvino_aigc_samples/blob/main/Wav2Lip/data_video_sun_5s.mp4?raw=true",
49
- "folder": "assets/video"
50
- }
51
- }
52
-
53
- # Función para crear la estructura general del proyecto
54
- def create_project_structure():
55
- """
56
- Crea la estructura de las carpetas del proyecto
57
- """
58
- for directory in PROJECT_DIRECTORIES:
59
- path = Path(directory)
60
- if not path.exists():
61
- path.mkdir(parents=True, exist_ok=True)
62
- print(f"Carpeta '{directory}' creada.")
63
- else:
64
- print(f"Carpeta '{directory}' ya existe.")
65
-
66
- # Función para crear el entorno virtual
67
- def create_virtual_environment():
68
- """
69
- Crea el entorno virtual si no existe.
70
- """
71
- env_path = Path("env")
72
- if not env_path.exists():
73
- print("Creando el entorno virtual...")
74
- subprocess.check_call([sys.executable, "-m", "venv", "env"])
75
- print(f"Entorno virtual creado en '{env_path}'.")
76
- else:
77
- print(f"El entorno virtual '{env_path}' ya existe.")
78
-
79
- # Función que activa y define pip y python
80
- def activate_virtual_environment():
81
- """
82
- Activa el entorno virtual y devuelve las rutas de pip y python.
83
- """
84
- if os.name == 'nt': # Windows
85
- python_path = str(Path("env") / "Scripts" / "python.exe")
86
- pip_path = str(Path("env") / "Scripts" / "pip.exe")
87
- else: # Unix/MacOS
88
- python_path = str(Path("env") / "bin" / "python")
89
- pip_path = str(Path("env") / "bin" / "pip")
90
-
91
- # Actualizar pip a la última versión en el entorno virtual usando python -m pip
92
- try:
93
- subprocess.check_call([python_path, "-m", "pip", "install", "--upgrade", "pip"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
94
- print("pip actualizado a la última versión.")
95
- except subprocess.CalledProcessError:
96
- print("Error al actualizar pip.")
97
- try:
98
- subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
99
- except subprocess.CalledProcessError:
100
- print("Error al instalar tqdm.")
101
-
102
- return python_path, pip_path
103
-
104
- # Funcion para instalar las dependencias desde requirements.txt con barra de progreso
105
- def install_requirements(pip_path):
106
- """
107
- Instala las dependencias de requirements.txt con una barra de progreso.
108
- """
109
- print("Instalando dependencias...")
110
- # Instalar tqdm en el entorno virtual si no está instalado
111
- try:
112
- subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
113
- except subprocess.CalledProcessError:
114
- print("Error al instalar tqdm.")
115
-
116
- from tqdm import tqdm # Importar tqdm para la barra de progreso
117
-
118
- # Leer requirements.txt y mostrar barra de progreso
119
- requirements_path = Path("requirements.txt")
120
- if not requirements_path.exists():
121
- print("Archivo requirements.txt no encontrado.")
122
- return
123
-
124
- with open(requirements_path, "r") as f:
125
- dependencies = f.read().splitlines()
126
-
127
- # Instalar cada dependencia con barra de progreso
128
- for dependency in tqdm(dependencies, desc="Instalando dependencias", unit="paquete"):
129
- try:
130
- subprocess.check_call([pip_path, "install", dependency], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
131
- except subprocess.CalledProcessError:
132
- print(f"\nError al instalar {dependency}.")
133
-
134
- print("Todas las dependencias fueron instaladas correctamente.")
135
-
136
- # Funcion para descargar los archivos de utilidades de OpenVINO Notebooks
137
- def download_openvino_utils(pip_path):
138
- """
139
- Descarga los archivos de utilidades de OpenVINO Notebooks en src/utils si no existen.
140
- """
141
- # Crear la carpeta de utilidades si no existe
142
- OPENVINO_UTILS_DIR.mkdir(parents=True, exist_ok=True)
143
-
144
- # Instalar requests en el entorno virtual si no está instalado
145
- try:
146
- subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
147
- except subprocess.CalledProcessError:
148
- print("Error al instalar requests.")
149
-
150
- # Instalar tqdm en el entorno virtual si no está instalado
151
- try:
152
- subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
153
- except subprocess.CalledProcessError:
154
- print("Error al instalar tqdm.")
155
-
156
- from tqdm import tqdm # Importar tqdm para la barra de progreso
157
-
158
- for filename, url in tqdm(OPENVINO_UTILS.items(), desc="Descargando utilidades de OpenVINO", unit="archivo"):
159
- file_path = OPENVINO_UTILS_DIR / filename
160
- if not file_path.exists():
161
- response = requests.get(url)
162
- if response.status_code == 200:
163
- with open(file_path, "wb") as f:
164
- f.write(response.content)
165
- else:
166
- print(f"Error al descargar {filename} desde {url}")
167
-
168
- # Función para descargar los archivos de ayuda específicos de Wav2Lip
169
- def download_wav2lip_helpers(pip_path):
170
- """
171
- Descarga los archivos de ayuda específicos de Wav2Lip si no existen.
172
- """
173
- WAV2LIP_HELPERS_DIR.mkdir(parents=True, exist_ok=True) # Crea `src` si no existe
174
-
175
- # Instalar requests en el entorno virtual si no está instalado
176
- try:
177
- subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
178
- except subprocess.CalledProcessError:
179
- print("Error al instalar requests.")
180
-
181
- try:
182
- subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
183
- except subprocess.CalledProcessError:
184
- print("Error al instalar tqdm.")
185
-
186
- from tqdm import tqdm # Importar tqdm para la barra de progreso
187
- for filename, url in tqdm(WAV2LIP_HELPERS.items(), desc="Descargando ayudas de Wav2Lip", unit="archivo"):
188
- file_path = WAV2LIP_HELPERS_DIR / filename
189
- if not file_path.exists():
190
- response = requests.get(url)
191
- if response.status_code == 200:
192
- with open(file_path, "wb") as f:
193
- f.write(response.content)
194
-
195
- # Función para descargar los archivos de ejemplo de entrada (audio y video)
196
- def download_example_files():
197
- """
198
- Descarga los archivos de ejemplo de entrada (audio y video) en sus carpetas correspondientes.
199
- """
200
- # Instalar requests en el entorno virtual si no está instalado
201
- try:
202
- subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
203
- except subprocess.CalledProcessError:
204
- print("Error al instalar requests.")
205
-
206
- try:
207
- subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
208
- except subprocess.CalledProcessError:
209
- print("Error al instalar tqdm.")
210
-
211
- from tqdm import tqdm # Importar tqdm para la barra de progreso
212
-
213
- for example_name, example_info in tqdm(EXAMPLE_FILES.items(), desc="Descargando archivos de ejemplo", unit="archivo"):
214
- folder_path = Path(example_info["folder"])
215
- file_path = folder_path / example_info["filename"]
216
-
217
- # Crear la carpeta si no existe
218
- folder_path.mkdir(parents=True, exist_ok=True)
219
-
220
- # Descargar el archivo si no existe
221
- if not file_path.exists():
222
- response = requests.get(example_info["url"])
223
- if response.status_code == 200:
224
- with open(file_path, "wb") as f:
225
- f.write(response.content)
226
-
227
- def clone_wav2lip_repo():
228
- """
229
- Clona el repositorio oficial de Wav2Lip, ocultando el progreso mediante tqdm.
230
- """
231
- repo_url = "https://github.com/Rudrabha/Wav2Lip"
232
- clone_path = "src/Wav2Lip"
233
-
234
- try:
235
- subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
236
- except subprocess.CalledProcessError:
237
- print("Error al instalar requests.")
238
-
239
- try:
240
- subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
241
- except subprocess.CalledProcessError:
242
- print("Error al instalar tqdm.")
243
-
244
- from tqdm import tqdm # Importar tqdm para la barra de progreso
245
-
246
- # Verifica si el repositorio ya existe para evitar clonarlo nuevamente
247
- if os.path.exists(clone_path):
248
- print(f"El repositorio '{clone_path}' ya existe.")
249
- return
250
-
251
- # Inicia el proceso de clonación con tqdm para ocultar el progreso
252
- print("Clonando el repositorio de Wav2Lip...")
253
- with tqdm(total=100, desc="Clonación en progreso", ncols=100, bar_format="{l_bar}{bar}") as pbar:
254
- # Ejecuta el comando de clonación
255
- exit_code = subprocess.call(["git", "clone", repo_url, clone_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
256
-
257
- if exit_code != 0:
258
- raise Exception("Error: La clonación del repositorio ha fallado.")
259
- else:
260
- pbar.update(100)
261
- print("Repositorio clonado exitosamente en 'Wav2Lip'.")
262
-
263
-
264
- if __name__ == "__main__":
265
- create_project_structure()
266
- create_virtual_environment()
267
- python_path, pip_path = activate_virtual_environment()
268
-
269
- download_openvino_utils(pip_path)
270
- download_wav2lip_helpers(pip_path)
271
- download_example_files()
272
- install_requirements(pip_path)
273
- clone_wav2lip_repo()
274
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 2024/03/11 setup.py
2
+
3
+ import os
4
+ import subprocess
5
+ import sys
6
+ import requests
7
+
8
+ from pathlib import Path
9
+
10
+ # Definición de las carpetas del proyecto
11
+ PROJECT_DIRECTORIES = [
12
+ "assets",
13
+ "assets/audio",
14
+ "assets/video",
15
+ "checkpoints",
16
+ "models",
17
+ "src",
18
+ "src/utils",
19
+ "tests",
20
+ "results"
21
+ ]
22
+
23
+ # URLs de las utilidades de OpenVINO Notebooks
24
+ OPENVINO_UTILS = {
25
+ "notebook_utils.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py",
26
+ "pip_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py"
27
+ }
28
+
29
+ # URLs de los archivos de ayuda de Wav2Lip
30
+ WAV2LIP_HELPERS = {
31
+ "gradio_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/gradio_helper.py",
32
+ "ov_inference.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/ov_inference.py",
33
+ "ov_wav2lip_helper.py": "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/wav2lip/ov_wav2lip_helper.py"
34
+ }
35
+
36
+ WAV2LIP_HELPERS_DIR = Path("src")
37
+ OPENVINO_UTILS_DIR = Path("src/utils")
38
+
39
+ # URLs de los archivos de ejemplo de entrada
40
+ EXAMPLE_FILES = {
41
+ "audio_example": {
42
+ "filename": "data_audio_sun_5s.wav",
43
+ "url": "https://github.com/sammysun0711/openvino_aigc_samples/blob/main/Wav2Lip/data_audio_sun_5s.wav?raw=true",
44
+ "folder": "assets/audio"
45
+ },
46
+ "video_example": {
47
+ "filename": "data_video_sun_5s.mp4",
48
+ "url": "https://github.com/sammysun0711/openvino_aigc_samples/blob/main/Wav2Lip/data_video_sun_5s.mp4?raw=true",
49
+ "folder": "assets/video"
50
+ }
51
+ }
52
+
53
+ # Función para crear la estructura general del proyecto
54
+ def create_project_structure():
55
+ """
56
+ Crea la estructura de las carpetas del proyecto
57
+ """
58
+ for directory in PROJECT_DIRECTORIES:
59
+ path = Path(directory)
60
+ if not path.exists():
61
+ path.mkdir(parents=True, exist_ok=True)
62
+ print(f"Carpeta '{directory}' creada.")
63
+ else:
64
+ print(f"Carpeta '{directory}' ya existe.")
65
+
66
+ # Función para crear el entorno virtual
67
+ def create_virtual_environment():
68
+ """
69
+ Crea el entorno virtual si no existe.
70
+ """
71
+ env_path = Path("env")
72
+ if not env_path.exists():
73
+ print("Creando el entorno virtual...")
74
+ subprocess.check_call([sys.executable, "-m", "venv", "env"])
75
+ print(f"Entorno virtual creado en '{env_path}'.")
76
+ else:
77
+ print(f"El entorno virtual '{env_path}' ya existe.")
78
+
79
+ # Función que activa y define pip y python
80
+ def activate_virtual_environment():
81
+ """
82
+ Activa el entorno virtual y devuelve las rutas de pip y python.
83
+ """
84
+ if os.name == 'nt': # Windows
85
+ python_path = str(Path("env") / "Scripts" / "python.exe")
86
+ pip_path = str(Path("env") / "Scripts" / "pip.exe")
87
+ else: # Unix/MacOS
88
+ python_path = str(Path("env") / "bin" / "python")
89
+ pip_path = str(Path("env") / "bin" / "pip")
90
+
91
+ # Actualizar pip a la última versión en el entorno virtual usando python -m pip
92
+ try:
93
+ subprocess.check_call([python_path, "-m", "pip", "install", "--upgrade", "pip"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
94
+ print("pip actualizado a la última versión.")
95
+ except subprocess.CalledProcessError:
96
+ print("Error al actualizar pip.")
97
+ try:
98
+ subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
99
+ except subprocess.CalledProcessError:
100
+ print("Error al instalar tqdm.")
101
+
102
+ return python_path, pip_path
103
+
104
+ # Funcion para instalar las dependencias desde requirements.txt con barra de progreso
105
+ def install_requirements(pip_path):
106
+ """
107
+ Instala las dependencias de requirements.txt con una barra de progreso.
108
+ """
109
+ print("Instalando dependencias...")
110
+ # Instalar tqdm en el entorno virtual si no está instalado
111
+ try:
112
+ subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
113
+ except subprocess.CalledProcessError:
114
+ print("Error al instalar tqdm.")
115
+
116
+ from tqdm import tqdm # Importar tqdm para la barra de progreso
117
+
118
+ # Leer requirements.txt y mostrar barra de progreso
119
+ requirements_path = Path("requirements.txt")
120
+ if not requirements_path.exists():
121
+ print("Archivo requirements.txt no encontrado.")
122
+ return
123
+
124
+ with open(requirements_path, "r") as f:
125
+ dependencies = f.read().splitlines()
126
+
127
+ # Instalar cada dependencia con barra de progreso
128
+ for dependency in tqdm(dependencies, desc="Instalando dependencias", unit="paquete"):
129
+ try:
130
+ subprocess.check_call([pip_path, "install", dependency], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
131
+ except subprocess.CalledProcessError:
132
+ print(f"\nError al instalar {dependency}.")
133
+
134
+ print("Todas las dependencias fueron instaladas correctamente.")
135
+
136
+ # Funcion para descargar los archivos de utilidades de OpenVINO Notebooks
137
+ def download_openvino_utils(pip_path):
138
+ """
139
+ Descarga los archivos de utilidades de OpenVINO Notebooks en src/utils si no existen.
140
+ """
141
+ # Crear la carpeta de utilidades si no existe
142
+ OPENVINO_UTILS_DIR.mkdir(parents=True, exist_ok=True)
143
+
144
+ # Instalar requests en el entorno virtual si no está instalado
145
+ try:
146
+ subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
147
+ except subprocess.CalledProcessError:
148
+ print("Error al instalar requests.")
149
+
150
+ # Instalar tqdm en el entorno virtual si no está instalado
151
+ try:
152
+ subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
153
+ except subprocess.CalledProcessError:
154
+ print("Error al instalar tqdm.")
155
+
156
+ from tqdm import tqdm # Importar tqdm para la barra de progreso
157
+
158
+ for filename, url in tqdm(OPENVINO_UTILS.items(), desc="Descargando utilidades de OpenVINO", unit="archivo"):
159
+ file_path = OPENVINO_UTILS_DIR / filename
160
+ if not file_path.exists():
161
+ response = requests.get(url)
162
+ if response.status_code == 200:
163
+ with open(file_path, "wb") as f:
164
+ f.write(response.content)
165
+ else:
166
+ print(f"Error al descargar {filename} desde {url}")
167
+
168
+ # Función para descargar los archivos de ayuda específicos de Wav2Lip
169
+ def download_wav2lip_helpers(pip_path):
170
+ """
171
+ Descarga los archivos de ayuda específicos de Wav2Lip si no existen.
172
+ """
173
+ WAV2LIP_HELPERS_DIR.mkdir(parents=True, exist_ok=True) # Crea `src` si no existe
174
+
175
+ # Instalar requests en el entorno virtual si no está instalado
176
+ try:
177
+ subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
178
+ except subprocess.CalledProcessError:
179
+ print("Error al instalar requests.")
180
+
181
+ try:
182
+ subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
183
+ except subprocess.CalledProcessError:
184
+ print("Error al instalar tqdm.")
185
+
186
+ from tqdm import tqdm # Importar tqdm para la barra de progreso
187
+ for filename, url in tqdm(WAV2LIP_HELPERS.items(), desc="Descargando ayudas de Wav2Lip", unit="archivo"):
188
+ file_path = WAV2LIP_HELPERS_DIR / filename
189
+ if not file_path.exists():
190
+ response = requests.get(url)
191
+ if response.status_code == 200:
192
+ with open(file_path, "wb") as f:
193
+ f.write(response.content)
194
+
195
+ # Función para descargar los archivos de ejemplo de entrada (audio y video)
196
+ def download_example_files():
197
+ """
198
+ Descarga los archivos de ejemplo de entrada (audio y video) en sus carpetas correspondientes.
199
+ """
200
+ # Instalar requests en el entorno virtual si no está instalado
201
+ try:
202
+ subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
203
+ except subprocess.CalledProcessError:
204
+ print("Error al instalar requests.")
205
+
206
+ try:
207
+ subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
208
+ except subprocess.CalledProcessError:
209
+ print("Error al instalar tqdm.")
210
+
211
+ from tqdm import tqdm # Importar tqdm para la barra de progreso
212
+
213
+ for example_name, example_info in tqdm(EXAMPLE_FILES.items(), desc="Descargando archivos de ejemplo", unit="archivo"):
214
+ folder_path = Path(example_info["folder"])
215
+ file_path = folder_path / example_info["filename"]
216
+
217
+ # Crear la carpeta si no existe
218
+ folder_path.mkdir(parents=True, exist_ok=True)
219
+
220
+ # Descargar el archivo si no existe
221
+ if not file_path.exists():
222
+ response = requests.get(example_info["url"])
223
+ if response.status_code == 200:
224
+ with open(file_path, "wb") as f:
225
+ f.write(response.content)
226
+
227
+ def clone_wav2lip_repo():
228
+ """
229
+ Clona el repositorio oficial de Wav2Lip, ocultando el progreso mediante tqdm.
230
+ """
231
+ repo_url = "https://github.com/Rudrabha/Wav2Lip"
232
+ clone_path = "src/Wav2Lip"
233
+
234
+ try:
235
+ subprocess.check_call([pip_path, "install", "requests"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
236
+ except subprocess.CalledProcessError:
237
+ print("Error al instalar requests.")
238
+
239
+ try:
240
+ subprocess.check_call([pip_path, "install", "tqdm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
241
+ except subprocess.CalledProcessError:
242
+ print("Error al instalar tqdm.")
243
+
244
+ from tqdm import tqdm # Importar tqdm para la barra de progreso
245
+
246
+ # Verifica si el repositorio ya existe para evitar clonarlo nuevamente
247
+ if os.path.exists(clone_path):
248
+ print(f"El repositorio '{clone_path}' ya existe.")
249
+ return
250
+
251
+ # Inicia el proceso de clonación con tqdm para ocultar el progreso
252
+ print("Clonando el repositorio de Wav2Lip...")
253
+ with tqdm(total=100, desc="Clonación en progreso", ncols=100, bar_format="{l_bar}{bar}") as pbar:
254
+ # Ejecuta el comando de clonación
255
+ exit_code = subprocess.call(["git", "clone", repo_url, clone_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
256
+
257
+ if exit_code != 0:
258
+ raise Exception("Error: La clonación del repositorio ha fallado.")
259
+ else:
260
+ pbar.update(100)
261
+ print("Repositorio clonado exitosamente en 'Wav2Lip'.")
262
+
263
+
264
+ if __name__ == "__main__":
265
+ create_project_structure()
266
+ create_virtual_environment()
267
+ python_path, pip_path = activate_virtual_environment()
268
+
269
+ download_openvino_utils(pip_path)
270
+ download_wav2lip_helpers(pip_path)
271
+ download_example_files()
272
+ install_requirements(pip_path)
273
+ clone_wav2lip_repo()
274
+
275
+
276
+
277
+
278
+
279
+
280
+
281
+
282
+
283
+
284
+
285
+
286
+
287
+
src/.gradio/certificate.pem DELETED
@@ -1,31 +0,0 @@
1
- -----BEGIN CERTIFICATE-----
2
- MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
- TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
- cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
- WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
- ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
- MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
- h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
- 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
- A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
- T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
- B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
- B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
- KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
- OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
- jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
- qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
- rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
- HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
- hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
- ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
- 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
- NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
- ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
- TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
- jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
- oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
- 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
- mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
- emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
- -----END CERTIFICATE-----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/audio_recorder.py DELETED
@@ -1,48 +0,0 @@
1
- # audio_recorder.py
2
-
3
- import sounddevice as sd
4
- from scipy.io.wavfile import write
5
- import os
6
-
7
- # Ruta para guardar el archivo de audio en el directorio `assets/audio/`
8
- AUDIO_PATH = os.path.join("..", "assets", "audio", "grabacion_8s.wav")
9
-
10
- def listar_dispositivos():
11
- """
12
- Lista todos los dispositivos de audio disponibles en el sistema.
13
- """
14
- print("Dispositivos de audio disponibles:")
15
- dispositivos = sd.query_devices()
16
- for idx, dispositivo in enumerate(dispositivos):
17
- print(f"{idx}: {dispositivo['name']} - {'Entrada' if dispositivo['max_input_channels'] > 0 else 'Salida'}")
18
- print("\nSelecciona el índice del dispositivo de entrada que prefieras para grabar audio.")
19
-
20
- def record_audio(duration=8, sample_rate=44100, device_index=None):
21
- """
22
- Graba el audio desde el micrófono durante un tiempo específico y lo guarda como archivo WAV.
23
-
24
- Args:
25
- duration (int): Duración de la grabación en segundos.
26
- sample_rate (int): Frecuencia de muestreo del audio.
27
- device_index (int): Índice del dispositivo de audio a utilizar.
28
- """
29
- print("Grabando...")
30
-
31
- # Iniciar la grabación con un canal
32
- audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, device=device_index)
33
- sd.wait() # Espera a que la grabación termine
34
-
35
- # Guardar el archivo de audio
36
- write(AUDIO_PATH, sample_rate, audio_data)
37
- print(f"Grabación completada. Archivo guardado en: {AUDIO_PATH}")
38
-
39
- if __name__ == "__main__":
40
- # Paso 1: Listar dispositivos de audio
41
- listar_dispositivos()
42
-
43
- # Aquí esperaremos tu selección del índice del dispositivo
44
- device_index = int(input("Introduce el índice del dispositivo de entrada que deseas utilizar: "))
45
-
46
- # Paso 2: Grabar audio con el dispositivo seleccionado
47
- record_audio(device_index=device_index)
48
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/call_openai_api.py DELETED
@@ -1,80 +0,0 @@
1
- import os
2
-
3
- from dotenv import load_dotenv
4
- from langchain.chat_models import ChatOpenAI
5
- from langchain.prompts import PromptTemplate
6
- from langchain.chains import LLMChain
7
- from pathlib import Path
8
-
9
- #Cargar variables de entorno desde el archivo .env
10
- # Ruta relativa al archivo .env en models/
11
- project_root = Path(__file__).resolve().parent.parent # Sube al nivel raíz del proyecto
12
- env_path = project_root / "models" / ".env" # Ruta completa al archivo .env
13
- load_dotenv(dotenv_path=env_path)
14
-
15
- #Configuracion de la clave de la api
16
- api_key = os.getenv("OPENAI_API_KEY")
17
- if not api_key:
18
- raise ValueError("No se encontro la clave de API")
19
-
20
- OPENAI_KEY_VAL = api_key
21
-
22
- llm = ChatOpenAI(
23
- openai_api_key = OPENAI_KEY_VAL,
24
- temperature = 0.7,
25
- model = "gpt-4"
26
- )
27
-
28
- #plantilla del prompt con el texto leido del archivo
29
- template ="""
30
- Eres un asistente de IA que orienta a los alumnos a ser mejores personas. Haz una haiku de 5 lineas sobre lo que te estan comentando. Da siempre la respuesta en Español
31
- Texto:{texto}
32
- Respuesta:
33
- """
34
- prompt = PromptTemplate(
35
- input_variables = ["texto"],
36
- template = template
37
- )
38
-
39
- chain = LLMChain(
40
- llm = llm,
41
- prompt = prompt
42
- )
43
-
44
- #def save_summary_to_file(summary_text, filename = 'response.txt'):
45
- def save_summary_to_file(summary_text, filename = 'C:/programacionEjercicios/miwav2lipv6/results/OpenAI_response.txt'):
46
- try:
47
- with open(filename,'w', encoding='utf-8') as file:
48
- file.write(summary_text)
49
- print(f"El resumen se ha guardado exitosamente en {filename}")
50
- except Exception as e:
51
- print(f"Ocurrio un error al guardar el resumen {e}")
52
-
53
- def read_text_from_file(filename):
54
- try:
55
- with open(filename, 'r') as file:
56
- return file.read()
57
- except Exception as e:
58
- print(f"Error al leer el archivo {filename}: {e}")
59
- return ""
60
-
61
-
62
- #def main():
63
- def moni(archivo):
64
- #texto_usuario = input("Ingresa un texto para resumir:")
65
- #texto_usuario = read_text_from_file("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
66
- texto_usuario = read_text_from_file(archivo)
67
- resultado = chain.run(texto = texto_usuario)
68
-
69
- #Mostrar el resumen generado
70
- print("\nResumen generado:")
71
- print(resultado)
72
- save_summary_to_file(resultado)
73
-
74
- return resultado
75
- #
76
-
77
-
78
- if __name__ == "__main__":
79
- moni()
80
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/convert_models.py DELETED
@@ -1,16 +0,0 @@
1
- import sys
2
- from pathlib import Path
3
-
4
- # Añade `src` a `sys.path` para que Python encuentre el módulo `utils`
5
- sys.path.append(str(Path(__file__).resolve().parent))
6
-
7
- # Importa la función desde utils/notebook_utils.py
8
- from utils.notebook_utils import download_file
9
- from ov_wav2lip_helper import download_and_convert_models
10
-
11
-
12
- OV_FACE_DETECTION_MODEL_PATH = Path("../miwav2lipv6/models/face_detection.xml")
13
- OV_WAV2LIP_MODEL_PATH = Path("../miwav2lipv6/models/wav2lip.xml")
14
-
15
-
16
- download_and_convert_models(OV_FACE_DETECTION_MODEL_PATH, OV_WAV2LIP_MODEL_PATH)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/gradio_helper.py DELETED
@@ -1,26 +0,0 @@
1
- from typing import Callable
2
- import gradio as gr
3
- import numpy as np
4
-
5
-
6
- examples = [
7
- [
8
- #"data_video_sun_5s.mp4",
9
- "data_video_sun.mp4",
10
- "data_audio_sun_5s.wav",
11
- ],
12
- ]
13
-
14
-
15
- def make_demo(fn: Callable):
16
- demo = gr.Interface(
17
- fn=fn,
18
- inputs=[
19
- gr.Video(label="Face video"),
20
- gr.Audio(label="Audio", type="filepath"),
21
- ],
22
- outputs="video",
23
- examples=examples,
24
- allow_flagging="never",
25
- )
26
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/interface.py DELETED
@@ -1,60 +0,0 @@
1
- # interface.py
2
-
3
- import gradio as gr
4
- import sounddevice as sd
5
- from scipy.io.wavfile import write
6
- import tempfile
7
- import shutil
8
- import os
9
-
10
- # Rutas de video y audio con absolutas para evitar errores de acceso
11
- AUDIO_COPY_PATH = os.path.abspath(os.path.join("..", "miwav2lipv6","assets", "audio", "grabacion_gradio.wav"))
12
- #VIDEO_PATH = os.path.abspath("../miwav2lipv6/assets/video/data_video_sun_5s.mp4")
13
- VIDEO_PATH = os.path.abspath("../miwav2lipv6/assets/video/data_video_sun.mp4")
14
-
15
- # Verificar la existencia del video
16
- if not os.path.exists(VIDEO_PATH):
17
- print(f"Advertencia: El archivo de video no se encontró en la ruta {VIDEO_PATH}")
18
-
19
- # Función para grabar audio
20
- def grabar_audio(duration=8, sample_rate=44100):
21
- print("Grabando...")
22
- audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
23
- sd.wait() # Espera a que la grabación termine
24
-
25
- # Guardar archivo temporal de audio
26
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
27
- write(temp_audio.name, sample_rate, audio_data)
28
- print("Grabación completada. Archivo temporal guardado en:", temp_audio.name)
29
-
30
- # Verificar y crear `assets/audio` si no existe
31
- os.makedirs(os.path.dirname(AUDIO_COPY_PATH), exist_ok=True)
32
-
33
- # Copiar a `assets/audio`
34
- shutil.copy(temp_audio.name, AUDIO_COPY_PATH)
35
- print(f"Copia de la grabación guardada en: {AUDIO_COPY_PATH}")
36
-
37
- return AUDIO_COPY_PATH
38
-
39
- # Función principal para la interfaz de Gradio
40
- def interfaz():
41
- with gr.Blocks() as demo:
42
- gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
43
-
44
- # Crear un botón de grabación
45
- with gr.Row():
46
- grabar_button = gr.Button("Iniciar Grabación")
47
-
48
- # Mostrar el audio grabado a la derecha
49
- output_audio = gr.Audio(label="Grabación de Audio", type="filepath")
50
-
51
- # Asignar la función al botón
52
- grabar_button.click(grabar_audio, outputs=output_audio)
53
-
54
- return demo
55
-
56
- # Ejecuta la interfaz con la ruta absoluta en allowed_paths
57
- if __name__ == "__main__":
58
- demo = interfaz()
59
- demo.launch(allowed_paths=[os.path.dirname(AUDIO_COPY_PATH)])
60
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/interfaceV2.py DELETED
@@ -1,183 +0,0 @@
1
- # interfaceV2.py
2
-
3
- import gradio as gr
4
- import sounddevice as sd
5
- from scipy.io.wavfile import write
6
- import tempfile
7
- import shutil
8
- import os
9
- import subprocess
10
- import sys
11
- from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
12
- from call_openai_api import moni as rtff # Asegúrate de que el archivo call_open_api.py esté en el mismo directorio
13
-
14
-
15
- # Paths to files (adjusted as per your specified structure)
16
- AUDIO_RECORD_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/grabacion_gradio.wav")
17
- #VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun_5s.mp4")
18
- VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun.mp4")
19
- #TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
20
- TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
21
- RESULT_AUDIO_TEMP_PATH = os.path.abspath( "C:/programacionEjercicios/miwav2lipv6/results/audiov2.wav")
22
- RESULT_AUDIO_FINAL_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/audio.wav")
23
- RESULT_VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/result_voice.mp4")
24
- TEXT_TO_SPEECH_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/text_to_speech.py")
25
-
26
- # Function to record 8-second audio
27
- def grabar_audio(duration=8, sample_rate=44100):
28
- print("Starting recording...")
29
- audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
30
- print(f"Recording in progress for {duration} seconds...")
31
- sd.wait()
32
- print("Recording completed.")
33
-
34
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
35
- write(temp_audio.name, sample_rate, audio_data)
36
- print("Audio temporarily saved at:", temp_audio.name)
37
- temp_audio.close() # Asegúrate de cerrarlo antes de usarlo
38
- os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
39
- shutil.copy(temp_audio.name, AUDIO_RECORD_PATH)
40
- print(f"Recording copied to: {AUDIO_RECORD_PATH}")
41
-
42
- return AUDIO_RECORD_PATH, "Recording completed."
43
-
44
- # Function to transcribe audio with Whisper
45
- def transcribir_con_progreso(audio_path):
46
- progreso = gr.Progress()
47
- progreso(0, "Starting transcription...")
48
- model_name = "openai/whisper-large"
49
- progreso(25, "Loading Whisper model...")
50
-
51
- transcripcion = transcribe_audio(audio_path, model_name)
52
- progreso(75, "Saving transcription...")
53
- guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
54
- progreso(100, "Transcription completed.")
55
- if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
56
- raise FileNotFoundError(f"El archivo {TRANSCRIPTION_TEXT_PATH} no se generó.")
57
-
58
- return transcripcion
59
-
60
- # Function to convert text to audio using text_to_speech.py
61
- def generar_audio_desde_texto():
62
- print("Generating audio from text...")
63
- result = subprocess.run(
64
- [sys.executable, TEXT_TO_SPEECH_PATH],
65
- capture_output=True,
66
- text=True
67
- )
68
- if result.returncode != 0:
69
- raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
70
- if result.stdout:
71
- print("Output:", result.stdout)
72
- if result.stderr:
73
- print("Errors:", result.stderr)
74
-
75
- if os.path.exists(RESULT_AUDIO_TEMP_PATH):
76
- print(f"Temporary audio generated at: {RESULT_AUDIO_TEMP_PATH}")
77
-
78
- os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
79
- shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
80
- print(f"Final audio copied to: {RESULT_AUDIO_FINAL_PATH}")
81
-
82
- return RESULT_AUDIO_FINAL_PATH
83
- else:
84
- print(f"Error: Audio file was not generated in {RESULT_AUDIO_FINAL_PATH} ")
85
- return None
86
-
87
- # Function to process video and audio using run_inference.py with the generated audio file
88
- def procesar_video_audio():
89
- print("Starting video and audio processing...")
90
- run_inference_path = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/run_inference.py")
91
-
92
- result = subprocess.run(
93
- [sys.executable, run_inference_path, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
94
- capture_output=True,
95
- text=True
96
- )
97
-
98
- if result.stdout:
99
- print("Output:", result.stdout)
100
- if result.stderr:
101
- print("Errors:", result.stderr)
102
-
103
- if os.path.exists(RESULT_VIDEO_PATH):
104
- print(f"Processed video saved at: {RESULT_VIDEO_PATH}")
105
- return RESULT_VIDEO_PATH
106
- else:
107
- print("Error: Video file was not generated at 'results/result_voice.mp4'")
108
- return None
109
-
110
- # Gradio Interface Configuration
111
- def interfaz():
112
- with gr.Blocks() as demo:
113
- with gr.Row():
114
- with gr.Column():
115
- gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
116
- grabar_button = gr.Button("Comenzando la grabacion de audio")
117
- estado_grabacion = gr.Textbox(label="Recording Status", interactive=False)
118
-
119
- with gr.Column():
120
- output_audio = gr.Audio(AUDIO_RECORD_PATH, label="Audio Grabado", interactive=False)
121
- output_audio_speech = gr.Audio(RESULT_AUDIO_FINAL_PATH, label="Audio TTS", interactive=False)
122
- video_resultado = gr.Video(RESULT_VIDEO_PATH,label="Video procesado", interactive=False)
123
- texto_transcripcion = gr.Textbox(label="Texto transcrito")
124
- progreso_transcripcion = gr.Textbox(label="Transcription Status", interactive=False)
125
-
126
- # Full flow: recording, transcription, text-to-speech, and video processing
127
- """
128
- def flujo_completo():
129
- _, mensaje_grabacion = grabar_audio()
130
- transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
131
- audio_generado = generar_audio_desde_texto()
132
- video_path = procesar_video_audio()
133
-
134
- # Ensure function always returns 5 outputs for Gradio, even in error cases
135
- if video_path and audio_generado:
136
- return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
137
- else:
138
- return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado or "Audio generation failed", video_path or "Video generation failed"
139
- """
140
- def flujo_completo():
141
- try:
142
- print("Inicio del flujo completo...")
143
- # Grabar audio
144
- audio_path, mensaje_grabacion = grabar_audio()
145
- print("Audio grabado en:", audio_path)
146
- # Transcribir audio
147
- transcripcion = transcribir_con_progreso(audio_path)
148
- print("Transcripción completada:", transcripcion)
149
-
150
- #respuesta_openai = rtff(transcripcion)
151
- respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
152
- print("Respuesta generada por OpenAI")
153
-
154
- # Generar audio desde texto
155
- audio_generado = generar_audio_desde_texto()
156
- print("Audio generado:", audio_generado)
157
- # Procesar video y audio
158
- video_path = procesar_video_audio()
159
- print("Video procesado en:", video_path)
160
- # Devolver resultados si todo fue exitoso
161
- return mensaje_grabacion, audio_path, transcripcion, audio_generado, video_path
162
-
163
- except Exception as e:
164
- # Imprime el error en la terminal y regresa mensajes de error a la interfaz
165
- print("Error detectado en flujo completo:", str(e))
166
- return (
167
- "Error durante el flujo completo",
168
- None, # Audio grabado
169
- f"Error: {str(e)}", # Transcripción
170
- None, # Audio generado
171
- None # Video procesado
172
- )
173
-
174
- grabar_button.click(
175
- flujo_completo,
176
- outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
177
- )
178
-
179
- return demo
180
-
181
- if __name__ == "__main__":
182
- demo = interfaz()
183
- demo.launch(allowed_paths=["C:/programacionEjercicios/miwav2lipv6/assets", "C:/programacionEjercicios/miwav2lipv6/results"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ov_inference.py DELETED
@@ -1,637 +0,0 @@
1
- from glob import glob
2
- from enum import Enum
3
- import math
4
- import subprocess
5
-
6
- import cv2
7
- import numpy as np
8
- from tqdm import tqdm
9
- import torch
10
- import torch.nn.functional as F
11
-
12
- from Wav2Lip import audio
13
- import openvino as ov
14
-
15
-
16
- device = "cpu"
17
-
18
-
19
- def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh):
20
- xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1
21
- dx, dy = (xc - axc) / aww, (yc - ayc) / ahh
22
- dw, dh = math.log(ww / aww), math.log(hh / ahh)
23
- return dx, dy, dw, dh
24
-
25
-
26
- def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh):
27
- xc, yc = dx * aww + axc, dy * ahh + ayc
28
- ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh
29
- x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2
30
- return x1, y1, x2, y2
31
-
32
-
33
- def nms(dets, thresh):
34
- if 0 == len(dets):
35
- return []
36
- x1, y1, x2, y2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4]
37
- areas = (x2 - x1 + 1) * (y2 - y1 + 1)
38
- order = scores.argsort()[::-1]
39
-
40
- keep = []
41
- while order.size > 0:
42
- i = order[0]
43
- keep.append(i)
44
- xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]])
45
- xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]])
46
-
47
- w, h = np.maximum(0.0, xx2 - xx1 + 1), np.maximum(0.0, yy2 - yy1 + 1)
48
- ovr = w * h / (areas[i] + areas[order[1:]] - w * h)
49
-
50
- inds = np.where(ovr <= thresh)[0]
51
- order = order[inds + 1]
52
-
53
- return keep
54
-
55
-
56
- def encode(matched, priors, variances):
57
- """Encode the variances from the priorbox layers into the ground truth boxes
58
- we have matched (based on jaccard overlap) with the prior boxes.
59
- Args:
60
- matched: (tensor) Coords of ground truth for each prior in point-form
61
- Shape: [num_priors, 4].
62
- priors: (tensor) Prior boxes in center-offset form
63
- Shape: [num_priors,4].
64
- variances: (list[float]) Variances of priorboxes
65
- Return:
66
- encoded boxes (tensor), Shape: [num_priors, 4]
67
- """
68
-
69
- # dist b/t match center and prior's center
70
- g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
71
- # encode variance
72
- g_cxcy /= variances[0] * priors[:, 2:]
73
- # match wh / prior wh
74
- g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
75
- g_wh = torch.log(g_wh) / variances[1]
76
- # return target for smooth_l1_loss
77
- return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
78
-
79
-
80
- def decode(loc, priors, variances):
81
- """Decode locations from predictions using priors to undo
82
- the encoding we did for offset regression at train time.
83
- Args:
84
- loc (tensor): location predictions for loc layers,
85
- Shape: [num_priors,4]
86
- priors (tensor): Prior boxes in center-offset form.
87
- Shape: [num_priors,4].
88
- variances: (list[float]) Variances of priorboxes
89
- Return:
90
- decoded bounding box predictions
91
- """
92
-
93
- boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
94
- boxes[:, :2] -= boxes[:, 2:] / 2
95
- boxes[:, 2:] += boxes[:, :2]
96
- return boxes
97
-
98
-
99
- def batch_decode(loc, priors, variances):
100
- """Decode locations from predictions using priors to undo
101
- the encoding we did for offset regression at train time.
102
- Args:
103
- loc (tensor): location predictions for loc layers,
104
- Shape: [num_priors,4]
105
- priors (tensor): Prior boxes in center-offset form.
106
- Shape: [num_priors,4].
107
- variances: (list[float]) Variances of priorboxes
108
- Return:
109
- decoded bounding box predictions
110
- """
111
-
112
- boxes = torch.cat((priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:], priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
113
- boxes[:, :, :2] -= boxes[:, :, 2:] / 2
114
- boxes[:, :, 2:] += boxes[:, :, :2]
115
- return boxes
116
-
117
-
118
- def get_smoothened_boxes(boxes, T):
119
- for i in range(len(boxes)):
120
- if i + T > len(boxes):
121
- window = boxes[len(boxes) - T :]
122
- else:
123
- window = boxes[i : i + T]
124
- boxes[i] = np.mean(window, axis=0)
125
- return boxes
126
-
127
-
128
- def detect(net, img, device):
129
- img = img - np.array([104, 117, 123])
130
- img = img.transpose(2, 0, 1)
131
- img = img.reshape((1,) + img.shape)
132
-
133
- img = torch.from_numpy(img).float().to(device)
134
- BB, CC, HH, WW = img.size()
135
-
136
- results = net({"x": img})
137
- olist = [torch.Tensor(results[i]) for i in range(12)]
138
-
139
- bboxlist = []
140
- for i in range(len(olist) // 2):
141
- olist[i * 2] = F.softmax(olist[i * 2], dim=1)
142
- olist = [oelem.data.cpu() for oelem in olist]
143
- for i in range(len(olist) // 2):
144
- ocls, oreg = olist[i * 2], olist[i * 2 + 1]
145
- FB, FC, FH, FW = ocls.size() # feature map size
146
- stride = 2 ** (i + 2) # 4,8,16,32,64,128
147
- anchor = stride * 4
148
- poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
149
- for Iindex, hindex, windex in poss:
150
- axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
151
- score = ocls[0, 1, hindex, windex]
152
- loc = oreg[0, :, hindex, windex].contiguous().view(1, 4)
153
- priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]])
154
- variances = [0.1, 0.2]
155
- box = decode(loc, priors, variances)
156
- x1, y1, x2, y2 = box[0] * 1.0
157
- # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
158
- bboxlist.append([x1, y1, x2, y2, score])
159
- bboxlist = np.array(bboxlist)
160
- if 0 == len(bboxlist):
161
- bboxlist = np.zeros((1, 5))
162
-
163
- return bboxlist
164
-
165
-
166
- def batch_detect(net, imgs, device):
167
- imgs = imgs - np.array([104, 117, 123])
168
- imgs = imgs.transpose(0, 3, 1, 2)
169
-
170
- imgs = torch.from_numpy(imgs).float().to(device)
171
- BB, CC, HH, WW = imgs.size()
172
-
173
- results = net({"x": imgs.numpy()})
174
- olist = [torch.Tensor(results[i]) for i in range(12)]
175
-
176
- bboxlist = []
177
- for i in range(len(olist) // 2):
178
- olist[i * 2] = F.softmax(olist[i * 2], dim=1)
179
- # olist[i * 2] = (olist[i * 2], dim=1)
180
- olist = [oelem.data.cpu() for oelem in olist]
181
- for i in range(len(olist) // 2):
182
- ocls, oreg = olist[i * 2], olist[i * 2 + 1]
183
- FB, FC, FH, FW = ocls.size() # feature map size
184
- stride = 2 ** (i + 2) # 4,8,16,32,64,128
185
- anchor = stride * 4
186
- poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
187
- for Iindex, hindex, windex in poss:
188
- axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
189
- score = ocls[:, 1, hindex, windex]
190
- loc = oreg[:, :, hindex, windex].contiguous().view(BB, 1, 4)
191
- priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]).view(1, 1, 4)
192
- variances = [0.1, 0.2]
193
- box = batch_decode(loc, priors, variances)
194
- box = box[:, 0] * 1.0
195
- # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
196
- bboxlist.append(torch.cat([box, score.unsqueeze(1)], 1).cpu().numpy())
197
- bboxlist = np.array(bboxlist)
198
- if 0 == len(bboxlist):
199
- bboxlist = np.zeros((1, BB, 5))
200
-
201
- return bboxlist
202
-
203
-
204
- def flip_detect(net, img, device):
205
- img = cv2.flip(img, 1)
206
- b = detect(net, img, device)
207
-
208
- bboxlist = np.zeros(b.shape)
209
- bboxlist[:, 0] = img.shape[1] - b[:, 2]
210
- bboxlist[:, 1] = b[:, 1]
211
- bboxlist[:, 2] = img.shape[1] - b[:, 0]
212
- bboxlist[:, 3] = b[:, 3]
213
- bboxlist[:, 4] = b[:, 4]
214
- return bboxlist
215
-
216
-
217
- def pts_to_bb(pts):
218
- min_x, min_y = np.min(pts, axis=0)
219
- max_x, max_y = np.max(pts, axis=0)
220
- return np.array([min_x, min_y, max_x, max_y])
221
-
222
-
223
- class OVFaceDetector(object):
224
- """An abstract class representing a face detector.
225
-
226
- Any other face detection implementation must subclass it. All subclasses
227
- must implement ``detect_from_image``, that return a list of detected
228
- bounding boxes. Optionally, for speed considerations detect from path is
229
- recommended.
230
- """
231
-
232
- def __init__(self, device, verbose):
233
- self.device = device
234
- self.verbose = verbose
235
-
236
- def detect_from_image(self, tensor_or_path):
237
- """Detects faces in a given image.
238
-
239
- This function detects the faces present in a provided BGR(usually)
240
- image. The input can be either the image itself or the path to it.
241
-
242
- Arguments:
243
- tensor_or_path {numpy.ndarray, torch.tensor or string} -- the path
244
- to an image or the image itself.
245
-
246
- Example::
247
-
248
- >>> path_to_image = 'data/image_01.jpg'
249
- ... detected_faces = detect_from_image(path_to_image)
250
- [A list of bounding boxes (x1, y1, x2, y2)]
251
- >>> image = cv2.imread(path_to_image)
252
- ... detected_faces = detect_from_image(image)
253
- [A list of bounding boxes (x1, y1, x2, y2)]
254
-
255
- """
256
- raise NotImplementedError
257
-
258
- def detect_from_directory(self, path, extensions=[".jpg", ".png"], recursive=False, show_progress_bar=True):
259
- """Detects faces from all the images present in a given directory.
260
-
261
- Arguments:
262
- path {string} -- a string containing a path that points to the folder containing the images
263
-
264
- Keyword Arguments:
265
- extensions {list} -- list of string containing the extensions to be
266
- consider in the following format: ``.extension_name`` (default:
267
- {['.jpg', '.png']}) recursive {bool} -- option wherever to scan the
268
- folder recursively (default: {False}) show_progress_bar {bool} --
269
- display a progressbar (default: {True})
270
-
271
- Example:
272
- >>> directory = 'data'
273
- ... detected_faces = detect_from_directory(directory)
274
- {A dictionary of [lists containing bounding boxes(x1, y1, x2, y2)]}
275
-
276
- """
277
- if self.verbose:
278
- logger = logging.getLogger(__name__)
279
-
280
- if len(extensions) == 0:
281
- if self.verbose:
282
- logger.error("Expected at list one extension, but none was received.")
283
- raise ValueError
284
-
285
- if self.verbose:
286
- logger.info("Constructing the list of images.")
287
- additional_pattern = "/**/*" if recursive else "/*"
288
- files = []
289
- for extension in extensions:
290
- files.extend(glob.glob(path + additional_pattern + extension, recursive=recursive))
291
-
292
- if self.verbose:
293
- logger.info("Finished searching for images. %s images found", len(files))
294
- logger.info("Preparing to run the detection.")
295
-
296
- predictions = {}
297
- for image_path in tqdm(files, disable=not show_progress_bar):
298
- if self.verbose:
299
- logger.info("Running the face detector on image: %s", image_path)
300
- predictions[image_path] = self.detect_from_image(image_path)
301
-
302
- if self.verbose:
303
- logger.info("The detector was successfully run on all %s images", len(files))
304
-
305
- return predictions
306
-
307
- @property
308
- def reference_scale(self):
309
- raise NotImplementedError
310
-
311
- @property
312
- def reference_x_shift(self):
313
- raise NotImplementedError
314
-
315
- @property
316
- def reference_y_shift(self):
317
- raise NotImplementedError
318
-
319
- @staticmethod
320
- def tensor_or_path_to_ndarray(tensor_or_path, rgb=True):
321
- """Convert path (represented as a string) or torch.tensor to a numpy.ndarray
322
-
323
- Arguments:
324
- tensor_or_path {numpy.ndarray, torch.tensor or string} -- path to the image, or the image itself
325
- """
326
- if isinstance(tensor_or_path, str):
327
- return cv2.imread(tensor_or_path) if not rgb else cv2.imread(tensor_or_path)[..., ::-1]
328
- elif torch.is_tensor(tensor_or_path):
329
- # Call cpu in case its coming from cuda
330
- return tensor_or_path.cpu().numpy()[..., ::-1].copy() if not rgb else tensor_or_path.cpu().numpy()
331
- elif isinstance(tensor_or_path, np.ndarray):
332
- return tensor_or_path[..., ::-1].copy() if not rgb else tensor_or_path
333
- else:
334
- raise TypeError
335
-
336
-
337
- class OVSFDDetector(OVFaceDetector):
338
- def __init__(self, device, path_to_detector="models/face_detection.xml", verbose=False):
339
- super(OVSFDDetector, self).__init__(device, verbose)
340
-
341
- core = ov.Core()
342
- self.face_detector = core.compile_model(path_to_detector, self.device)
343
-
344
- def detect_from_image(self, tensor_or_path):
345
- image = self.tensor_or_path_to_ndarray(tensor_or_path)
346
-
347
- bboxlist = detect(self.face_detector, image, device="cpu")
348
- keep = nms(bboxlist, 0.3)
349
- bboxlist = bboxlist[keep, :]
350
- bboxlist = [x for x in bboxlist if x[-1] > 0.5]
351
-
352
- return bboxlist
353
-
354
- def detect_from_batch(self, images):
355
- bboxlists = batch_detect(self.face_detector, images, device="cpu")
356
- keeps = [nms(bboxlists[:, i, :], 0.3) for i in range(bboxlists.shape[1])]
357
- bboxlists = [bboxlists[keep, i, :] for i, keep in enumerate(keeps)]
358
- bboxlists = [[x for x in bboxlist if x[-1] > 0.5] for bboxlist in bboxlists]
359
-
360
- return bboxlists
361
-
362
- @property
363
- def reference_scale(self):
364
- return 195
365
-
366
- @property
367
- def reference_x_shift(self):
368
- return 0
369
-
370
- @property
371
- def reference_y_shift(self):
372
- return 0
373
-
374
-
375
- class LandmarksType(Enum):
376
- """Enum class defining the type of landmarks to detect.
377
-
378
- ``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face
379
- ``_2halfD`` - this points represent the projection of the 3D points into 3D
380
- ``_3D`` - detect the points ``(x,y,z)``` in a 3D space
381
-
382
- """
383
-
384
- _2D = 1
385
- _2halfD = 2
386
- _3D = 3
387
-
388
-
389
- class NetworkSize(Enum):
390
- # TINY = 1
391
- # SMALL = 2
392
- # MEDIUM = 3
393
- LARGE = 4
394
-
395
- def __new__(cls, value):
396
- member = object.__new__(cls)
397
- member._value_ = value
398
- return member
399
-
400
- def __int__(self):
401
- return self.value
402
-
403
-
404
- class OVFaceAlignment:
405
- def __init__(
406
- self, landmarks_type, network_size=NetworkSize.LARGE, device="CPU", flip_input=False, verbose=False, path_to_detector="models/face_detection.xml"
407
- ):
408
- self.device = device
409
- self.flip_input = flip_input
410
- self.landmarks_type = landmarks_type
411
- self.verbose = verbose
412
-
413
- network_size = int(network_size)
414
-
415
- self.face_detector = OVSFDDetector(device=device, path_to_detector=path_to_detector, verbose=verbose)
416
-
417
- def get_detections_for_batch(self, images):
418
- images = images[..., ::-1]
419
- detected_faces = self.face_detector.detect_from_batch(images.copy())
420
- results = []
421
-
422
- for i, d in enumerate(detected_faces):
423
- if len(d) == 0:
424
- results.append(None)
425
- continue
426
- d = d[0]
427
- d = np.clip(d, 0, None)
428
-
429
- x1, y1, x2, y2 = map(int, d[:-1])
430
- results.append((x1, y1, x2, y2))
431
-
432
- return results
433
-
434
-
435
- def face_detect_ov(images, device, face_det_batch_size, pads, nosmooth, path_to_detector):
436
- detector = OVFaceAlignment(LandmarksType._2D, flip_input=False, device=device, path_to_detector=path_to_detector)
437
-
438
- batch_size = face_det_batch_size
439
-
440
- print("face_detect_ov images[0].shape: ", images[0].shape)
441
- while 1:
442
- predictions = []
443
- try:
444
- for i in tqdm(range(0, len(images), batch_size)):
445
- predictions.extend(detector.get_detections_for_batch(np.array(images[i : i + batch_size])))
446
- except RuntimeError:
447
- if batch_size == 1:
448
- raise RuntimeError("Image too big to run face detection on GPU. Please use the --resize_factor argument")
449
- batch_size //= 2
450
- print("Recovering from OOM error; New batch size: {}".format(batch_size))
451
- continue
452
- break
453
-
454
- results = []
455
- pady1, pady2, padx1, padx2 = pads
456
- for rect, image in zip(predictions, images):
457
- if rect is None:
458
- # check this frame where the face was not detected.
459
- cv2.imwrite("temp/faulty_frame.jpg", image)
460
- raise ValueError("Face not detected! Ensure the video contains a face in all the frames.")
461
-
462
- y1 = max(0, rect[1] - pady1)
463
- y2 = min(image.shape[0], rect[3] + pady2)
464
- x1 = max(0, rect[0] - padx1)
465
- x2 = min(image.shape[1], rect[2] + padx2)
466
-
467
- results.append([x1, y1, x2, y2])
468
-
469
- boxes = np.array(results)
470
- if not nosmooth:
471
- boxes = get_smoothened_boxes(boxes, T=5)
472
- results = [[image[y1:y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)]
473
-
474
- del detector
475
- return results
476
-
477
-
478
- def datagen(frames, mels, box, static, face_det_batch_size, pads, nosmooth, img_size, wav2lip_batch_size, path_to_detector):
479
- img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
480
-
481
- if box[0] == -1:
482
- if not static:
483
- # BGR2RGB for CNN face detection
484
- face_det_results = face_detect_ov(frames, "CPU", face_det_batch_size, pads, nosmooth, path_to_detector)
485
- else:
486
- face_det_results = face_detect_ov([frames[0]], "CPU", face_det_batch_size, pads, nosmooth, path_to_detector)
487
- else:
488
- print("Using the specified bounding box instead of face detection...")
489
- y1, y2, x1, x2 = box
490
- face_det_results = [[f[y1:y2, x1:x2], (y1, y2, x1, x2)] for f in frames]
491
-
492
- for i, m in enumerate(mels):
493
- idx = 0 if static else i % len(frames)
494
- frame_to_save = frames[idx].copy()
495
- face, coords = face_det_results[idx].copy()
496
-
497
- face = cv2.resize(face, (img_size, img_size))
498
-
499
- img_batch.append(face)
500
- mel_batch.append(m)
501
- frame_batch.append(frame_to_save)
502
- coords_batch.append(coords)
503
-
504
- if len(img_batch) >= wav2lip_batch_size:
505
- img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
506
-
507
- img_masked = img_batch.copy()
508
- img_masked[:, img_size // 2 :] = 0
509
-
510
- img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.0
511
- mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
512
-
513
- yield img_batch, mel_batch, frame_batch, coords_batch
514
- img_batch, mel_batch, frame_batch, coords_batch = [], [], [], []
515
-
516
- if len(img_batch) > 0:
517
- img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch)
518
-
519
- img_masked = img_batch.copy()
520
- img_masked[:, img_size // 2 :] = 0
521
-
522
- img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255.0
523
- mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
524
-
525
- yield img_batch, mel_batch, frame_batch, coords_batch
526
-
527
-
528
- def ov_inference(
529
- face_path,
530
- audio_path,
531
- face_detection_path="models/face_detection.xml",
532
- wav2lip_path="models/wav2lip.xml",
533
- inference_device="CPU",
534
- wav2lip_batch_size=128,
535
- outfile="results/result_voice.mp4",
536
- resize_factor=1,
537
- rotate=False,
538
- crop=[0, -1, 0, -1],
539
- mel_step_size=16,
540
- box=[-1, -1, -1, -1],
541
- static=False,
542
- img_size=96,
543
- face_det_batch_size=16,
544
- pads=[0, 10, 0, 0],
545
- nosmooth=False,
546
- ):
547
- print("Reading video frames...")
548
-
549
- video_stream = cv2.VideoCapture(face_path)
550
- fps = video_stream.get(cv2.CAP_PROP_FPS)
551
-
552
- full_frames = []
553
- while 1:
554
- still_reading, frame = video_stream.read()
555
- if not still_reading:
556
- video_stream.release()
557
- break
558
- if resize_factor > 1:
559
- frame = cv2.resize(frame, (frame.shape[1] // resize_factor, frame.shape[0] // resize_factor))
560
-
561
- if rotate:
562
- frame = cv2.rotate(frame, cv2.cv2.ROTATE_90_CLOCKWISE)
563
-
564
- y1, y2, x1, x2 = crop
565
- if x2 == -1:
566
- x2 = frame.shape[1]
567
- if y2 == -1:
568
- y2 = frame.shape[0]
569
-
570
- frame = frame[y1:y2, x1:x2]
571
-
572
- full_frames.append(frame)
573
-
574
- print("Number of frames available for inference: " + str(len(full_frames)))
575
-
576
- core = ov.Core()
577
-
578
- if not audio_path.endswith(".wav"):
579
- print("Extracting raw audio...")
580
- command = "ffmpeg -y -i {} -strict -2 {}".format(audio_path, "temp/temp.wav")
581
-
582
- subprocess.call(command, shell=True)
583
- audio_path = "temp/temp.wav"
584
-
585
- wav = audio.load_wav(audio_path, 16000)
586
- mel = audio.melspectrogram(wav)
587
- print(mel.shape)
588
-
589
- if np.isnan(mel.reshape(-1)).sum() > 0:
590
- raise ValueError("Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again")
591
-
592
- mel_chunks = []
593
- mel_idx_multiplier = 80.0 / fps
594
- i = 0
595
- while 1:
596
- start_idx = int(i * mel_idx_multiplier)
597
- if start_idx + mel_step_size > len(mel[0]):
598
- mel_chunks.append(mel[:, len(mel[0]) - mel_step_size :])
599
- break
600
- mel_chunks.append(mel[:, start_idx : start_idx + mel_step_size])
601
- i += 1
602
-
603
- print("Length of mel chunks: {}".format(len(mel_chunks)))
604
-
605
- full_frames = full_frames[: len(mel_chunks)]
606
- batch_size = wav2lip_batch_size
607
- gen = datagen(full_frames.copy(), mel_chunks, box, static, face_det_batch_size, pads, nosmooth, img_size, wav2lip_batch_size, face_detection_path)
608
- for i, (img_batch, mel_batch, frames, coords) in enumerate(tqdm(gen, total=int(np.ceil(float(len(mel_chunks)) / batch_size)))):
609
- if i == 0:
610
- img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device)
611
- mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device)
612
- compiled_wav2lip_model = core.compile_model(wav2lip_path, inference_device)
613
- print("Model loaded")
614
-
615
- frame_h, frame_w = full_frames[0].shape[:-1]
616
- out = cv2.VideoWriter("C:/programacionEjercicios/miwav2lipv6/src/Wav2Lip/temp/result.avi", cv2.VideoWriter_fourcc(*"DIVX"), fps, (frame_w, frame_h))
617
- pred_ov = compiled_wav2lip_model({"audio_sequences": mel_batch.numpy(), "face_sequences": img_batch.numpy()})[0]
618
- else:
619
- img_batch = np.transpose(img_batch, (0, 3, 1, 2))
620
- mel_batch = np.transpose(mel_batch, (0, 3, 1, 2))
621
- pred_ov = compiled_wav2lip_model({"audio_sequences": mel_batch, "face_sequences": img_batch})[0]
622
-
623
- pred_ov = compiled_wav2lip_model({"audio_sequences": mel_batch, "face_sequences": img_batch})[0]
624
- pred_ov = pred_ov.transpose(0, 2, 3, 1) * 255.0
625
- for p, f, c in zip(pred_ov, frames, coords):
626
- y1, y2, x1, x2 = c
627
- p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
628
-
629
- f[y1:y2, x1:x2] = p
630
- out.write(f)
631
-
632
- out.release()
633
-
634
- command = "ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}".format(audio_path, "C:/programacionEjercicios/miwav2lipv6/src/Wav2Lip/temp/result.avi", outfile)
635
- subprocess.call(command, shell=True)
636
-
637
- return outfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ov_wav2lip_helper.py DELETED
@@ -1,68 +0,0 @@
1
- import numpy as np
2
- import sys
3
- import os
4
- import openvino as ov
5
- import torch
6
-
7
- from pathlib import Path
8
- # Añade `src` al `sys.path` para que Python encuentre `utils/notebook_utils.py`
9
- sys.path.append(str(Path(__file__).resolve().parent))
10
-
11
- # Importa `download_file` desde `notebook_utils`
12
- from utils.notebook_utils import download_file
13
- from huggingface_hub import hf_hub_download
14
- from Wav2Lip.face_detection.detection.sfd.net_s3fd import s3fd
15
- from Wav2Lip.models import Wav2Lip
16
-
17
-
18
-
19
- def _load(checkpoint_path):
20
- checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage)
21
- return checkpoint
22
-
23
-
24
- def load_model(path):
25
- model = Wav2Lip()
26
- print("Load checkpoint from: {}".format(path))
27
- checkpoint = _load(path)
28
- s = checkpoint["state_dict"]
29
- new_s = {}
30
- for k, v in s.items():
31
- new_s[k.replace("module.", "")] = v
32
- model.load_state_dict(new_s)
33
-
34
- return model.eval()
35
-
36
-
37
- def download_and_convert_models(ov_face_detection_model_path, ov_wav2lip_model_path):
38
- models_urls = {"s3fd": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"}
39
- path_to_detector = "checkpoints/face_detection.pth"
40
- # Convert Face Detection Model
41
- print("Convert Face Detection Model ...")
42
- if not os.path.isfile(path_to_detector):
43
- download_file(models_urls["s3fd"])
44
- if not os.path.exists("checkpoints"):
45
- os.mkdir("checkpoints")
46
- os.replace("s3fd-619a316812.pth", path_to_detector)
47
- model_weights = torch.load(path_to_detector)
48
-
49
- face_detector = s3fd()
50
- face_detector.load_state_dict(model_weights)
51
-
52
- if not ov_face_detection_model_path.exists():
53
- face_detection_dummy_inputs = torch.FloatTensor(np.random.rand(1, 3, 768, 576))
54
- face_detection_ov_model = ov.convert_model(face_detector, example_input=face_detection_dummy_inputs)
55
- ov.save_model(face_detection_ov_model, ov_face_detection_model_path)
56
- print("Converted face detection OpenVINO model: ", ov_face_detection_model_path)
57
-
58
- print("Convert Wav2Lip Model ...")
59
- path_to_wav2lip = hf_hub_download(repo_id="numz/wav2lip_studio", filename="Wav2lip/wav2lip.pth", local_dir="checkpoints")
60
- wav2lip = load_model(path_to_wav2lip)
61
- img_batch = torch.FloatTensor(np.random.rand(123, 6, 96, 96))
62
- mel_batch = torch.FloatTensor(np.random.rand(123, 1, 80, 16))
63
-
64
- if not ov_wav2lip_model_path.exists():
65
- example_inputs = {"audio_sequences": mel_batch, "face_sequences": img_batch}
66
- wav2lip_ov_model = ov.convert_model(wav2lip, example_input=example_inputs)
67
- ov.save_model(wav2lip_ov_model, ov_wav2lip_model_path)
68
- print("Converted face detection OpenVINO model: ", ov_wav2lip_model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/run_inference.py DELETED
@@ -1,67 +0,0 @@
1
- import os
2
- from ov_inference import ov_inference
3
- import soundfile as sf
4
- import cv2
5
-
6
- def verificar_archivos(video_path, audio_path):
7
- """
8
- Verifica que los archivos de video y audio existen y son legibles.
9
-
10
- Args:
11
- video_path (str): Ruta del archivo de video.
12
- audio_path (str): Ruta del archivo de audio.
13
-
14
- Returns:
15
- bool: True si ambos archivos son legibles, False en caso contrario.
16
- """
17
- # Verificar el archivo de video
18
- if not os.path.exists(video_path):
19
- print(f"Error: El archivo de video no existe en la ruta {video_path}")
20
- return False
21
- else:
22
- # Intentar abrir el video
23
- cap = cv2.VideoCapture(video_path)
24
- if not cap.isOpened():
25
- print(f"Error: No se puede abrir el archivo de video en {video_path}")
26
- return False
27
- else:
28
- print(f"Archivo de video {video_path} está accesible.")
29
- cap.release()
30
-
31
- # Verificar el archivo de audio
32
- if not os.path.exists(audio_path):
33
- print(f"Error: El archivo de audio no existe en la ruta {audio_path}")
34
- return False
35
- else:
36
- try:
37
- # Intentar abrir el archivo de audio
38
- with sf.SoundFile(audio_path) as audio_file:
39
- print(f"Archivo de audio {audio_path} está accesible.")
40
- except Exception as e:
41
- print(f"Error al leer el archivo de audio: {e}")
42
- return False
43
-
44
- return True
45
-
46
- # Rutas de archivos
47
- #video_path = os.path.abspath("../miwav2lipv6/assets/video/data_video_sun_5s.mp4")
48
- video_path = os.path.abspath("../miwav2lipv6/assets/video/data_video_sun.mp4")
49
- #audio_path = os.path.abspath("../miwav2lipv6/assets/audio/grabacion_gradio.wav")
50
- audio_path = os.path.abspath("../miwav2lipv6/assets/audio/audio.wav")
51
- face_detection_path = os.path.abspath("../miwav2lipv6/models/face_detection.xml")
52
- wav2lip_path = os.path.abspath("../miwav2lipv6/models/wav2lip.xml")
53
- outfile = os.path.abspath("../miwav2lipv6/results/result_voice.mp4")
54
-
55
- # Verificar archivos antes de llamar a ov_inference
56
- if verificar_archivos(video_path, audio_path):
57
- ov_inference(
58
- video_path,
59
- audio_path,
60
- face_detection_path=face_detection_path,
61
- wav2lip_path=wav2lip_path,
62
- inference_device="CPU",
63
- outfile=outfile,
64
- resize_factor = 2,
65
- )
66
- else:
67
- print("No se pudo proceder con la inferencia debido a problemas con los archivos.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/text_to_speech.py DELETED
@@ -1,36 +0,0 @@
1
- # text_to_speech.py
2
-
3
- from gtts import gTTS
4
- import os
5
-
6
- # Rutas de los archivos
7
- #TRANSCRIPTION_TEXT_PATH = "C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt"
8
- TRANSCRIPTION_TEXT_PATH = "C:/programacionEjercicios/miwav2lipv6/results/OpenAI_response.txt"
9
- OUTPUT_AUDIO_PATH = "C:/programacionEjercicios/miwav2lipv6/assets/audio/audio.wav"
10
-
11
- def generar_audio_desde_texto():
12
- """
13
- Convierte el texto en `transcripcion.txt` a un archivo de audio en español (`audio.wav`).
14
- """
15
- try:
16
- # Verificar si el archivo de transcripción existe
17
- if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
18
- print("Error: No se encontró el archivo de transcripción.")
19
- return
20
-
21
- # Leer el contenido de transcripcion.txt
22
- with open(TRANSCRIPTION_TEXT_PATH, "r", encoding="utf-8") as file:
23
- texto = file.read()
24
-
25
- # Generar el audio en español usando gTTS
26
- tts = gTTS(text=texto, lang='es', slow=False)
27
- tts.save(OUTPUT_AUDIO_PATH)
28
-
29
- print(f"Audio generado correctamente en: {OUTPUT_AUDIO_PATH}")
30
-
31
- except Exception as e:
32
- print(f"Error al generar el audio: {e}")
33
-
34
- if __name__ == "__main__":
35
- generar_audio_desde_texto()
36
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils/notebook_utils.py DELETED
@@ -1,708 +0,0 @@
1
- import os
2
- import platform
3
- import sys
4
- import threading
5
- import time
6
- import urllib.parse
7
-
8
- from os import PathLike
9
- from pathlib import Path
10
- from typing import List, NamedTuple, Optional, Tuple
11
- from tqdm import tqdm
12
-
13
- import numpy as np
14
- from openvino.runtime import Core, Type, get_version
15
- from IPython.display import HTML, Image, display
16
-
17
- import openvino as ov
18
- from openvino.runtime.passes import Manager, MatcherPass, WrapType, Matcher
19
- from openvino.runtime import opset10 as ops
20
-
21
-
22
- # ## Files
23
- #
24
- # Load an image, download a file, download an IR model, and create a progress bar to show download progress.
25
-
26
- def device_widget(default="AUTO", exclude=None, added=None):
27
- import openvino as ov
28
- import ipywidgets as widgets
29
-
30
- core = ov.Core()
31
-
32
- supported_devices = core.available_devices + ["AUTO"]
33
- exclude = exclude or []
34
- if exclude:
35
- for ex_device in exclude:
36
- if ex_device in supported_devices:
37
- supported_devices.remove(ex_device)
38
-
39
- added = added or []
40
- if added:
41
- for add_device in added:
42
- if add_device not in supported_devices:
43
- supported_devices.append(add_device)
44
-
45
- device = widgets.Dropdown(
46
- options=supported_devices,
47
- value=default,
48
- description="Device:",
49
- disabled=False,
50
- )
51
- return device
52
-
53
-
54
- def quantization_widget(default=True):
55
- import ipywidgets as widgets
56
-
57
- to_quantize = widgets.Checkbox(
58
- value=default,
59
- description="Quantization",
60
- disabled=False,
61
- )
62
-
63
- return to_quantize
64
-
65
-
66
- def pip_install(*args):
67
- import subprocess # nosec - disable B404:import-subprocess check
68
-
69
- cli_args = []
70
- for arg in args:
71
- cli_args.extend(str(arg).split(" "))
72
- subprocess.run([sys.executable, "-m", "pip", "install", *cli_args], shell=(platform.system() == "Windows"), check=True)
73
-
74
-
75
- def load_image(path: str) -> np.ndarray:
76
- """
77
- Loads an image from `path` and returns it as BGR numpy array. `path`
78
- should point to an image file, either a local filename or a url. The image is
79
- not stored to the filesystem. Use the `download_file` function to download and
80
- store an image.
81
-
82
- :param path: Local path name or URL to image.
83
- :return: image as BGR numpy array
84
- """
85
- import cv2
86
- import requests
87
-
88
- if path.startswith("http"):
89
- # Set User-Agent to Mozilla because some websites block
90
- # requests with User-Agent Python
91
- response = requests.get(path, headers={"User-Agent": "Mozilla/5.0"})
92
- array = np.asarray(bytearray(response.content), dtype="uint8")
93
- image = cv2.imdecode(array, -1) # Loads the image as BGR
94
- else:
95
- image = cv2.imread(path)
96
- return image
97
-
98
-
99
- def download_file(
100
- url: PathLike,
101
- filename: PathLike = None,
102
- directory: PathLike = None,
103
- show_progress: bool = True,
104
- silent: bool = False,
105
- timeout: int = 10,
106
- ) -> PathLike:
107
- """
108
- Download a file from a url and save it to the local filesystem. The file is saved to the
109
- current directory by default, or to `directory` if specified. If a filename is not given,
110
- the filename of the URL will be used.
111
-
112
- :param url: URL that points to the file to download
113
- :param filename: Name of the local file to save. Should point to the name of the file only,
114
- not the full path. If None the filename from the url will be used
115
- :param directory: Directory to save the file to. Will be created if it doesn't exist
116
- If None the file will be saved to the current working directory
117
- :param show_progress: If True, show an TQDM ProgressBar
118
- :param silent: If True, do not print a message if the file already exists
119
- :param timeout: Number of seconds before cancelling the connection attempt
120
- :return: path to downloaded file
121
- """
122
- from tqdm.notebook import tqdm_notebook
123
- import requests
124
-
125
- filename = filename or Path(urllib.parse.urlparse(url).path).name
126
- chunk_size = 16384 # make chunks bigger so that not too many updates are triggered for Jupyter front-end
127
-
128
- filename = Path(filename)
129
- if len(filename.parts) > 1:
130
- raise ValueError(
131
- "`filename` should refer to the name of the file, excluding the directory. "
132
- "Use the `directory` parameter to specify a target directory for the downloaded file."
133
- )
134
-
135
- # create the directory if it does not exist, and add the directory to the filename
136
- if directory is not None:
137
- directory = Path(directory)
138
- directory.mkdir(parents=True, exist_ok=True)
139
- filename = directory / Path(filename)
140
-
141
- try:
142
- response = requests.get(url=url, headers={"User-agent": "Mozilla/5.0"}, stream=True)
143
- response.raise_for_status()
144
- except (
145
- requests.exceptions.HTTPError
146
- ) as error: # For error associated with not-200 codes. Will output something like: "404 Client Error: Not Found for url: {url}"
147
- raise Exception(error) from None
148
- except requests.exceptions.Timeout:
149
- raise Exception(
150
- "Connection timed out. If you access the internet through a proxy server, please "
151
- "make sure the proxy is set in the shell from where you launched Jupyter."
152
- ) from None
153
- except requests.exceptions.RequestException as error:
154
- raise Exception(f"File downloading failed with error: {error}") from None
155
-
156
- # download the file if it does not exist, or if it exists with an incorrect file size
157
- filesize = int(response.headers.get("Content-length", 0))
158
- if not filename.exists() or (os.stat(filename).st_size != filesize):
159
- with tqdm(
160
- total=filesize,
161
- unit="B",
162
- unit_scale=True,
163
- unit_divisor=1024,
164
- desc=str(filename),
165
- disable=not show_progress,
166
- ) as progress_bar:
167
- with open(filename, "wb") as file_object:
168
- for chunk in response.iter_content(chunk_size):
169
- file_object.write(chunk)
170
- progress_bar.update(len(chunk))
171
- progress_bar.refresh()
172
- else:
173
- if not silent:
174
- print(f"'{filename}' already exists.")
175
-
176
- response.close()
177
-
178
- return filename.resolve()
179
-
180
-
181
- def download_ir_model(model_xml_url: str, destination_folder: PathLike = None) -> PathLike:
182
- """
183
- Download IR model from `model_xml_url`. Downloads model xml and bin file; the weights file is
184
- assumed to exist at the same location and name as model_xml_url with a ".bin" extension.
185
-
186
- :param model_xml_url: URL to model xml file to download
187
- :param destination_folder: Directory where downloaded model xml and bin are saved. If None, model
188
- files are saved to the current directory
189
- :return: path to downloaded xml model file
190
- """
191
- model_bin_url = model_xml_url[:-4] + ".bin"
192
- model_xml_path = download_file(model_xml_url, directory=destination_folder, show_progress=False)
193
- download_file(model_bin_url, directory=destination_folder)
194
- return model_xml_path
195
-
196
-
197
- # ## Images
198
-
199
- # ### Convert Pixel Data
200
- #
201
- # Normalize image pixel values between 0 and 1, and convert images to RGB and BGR.
202
-
203
- # In[ ]:
204
-
205
-
206
- def normalize_minmax(data):
207
- """
208
- Normalizes the values in `data` between 0 and 1
209
- """
210
- if data.max() == data.min():
211
- raise ValueError("Normalization is not possible because all elements of" f"`data` have the same value: {data.max()}.")
212
- return (data - data.min()) / (data.max() - data.min())
213
-
214
-
215
- def to_rgb(image_data: np.ndarray) -> np.ndarray:
216
- """
217
- Convert image_data from BGR to RGB
218
- """
219
- import cv2
220
-
221
- return cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
222
-
223
-
224
- def to_bgr(image_data: np.ndarray) -> np.ndarray:
225
- """
226
- Convert image_data from RGB to BGR
227
- """
228
- import cv2
229
-
230
- return cv2.cvtColor(image_data, cv2.COLOR_RGB2BGR)
231
-
232
-
233
- # ## Videos
234
-
235
- # ### Video Player
236
- #
237
- # Custom video player to fulfill FPS requirements. You can set target FPS and output size, flip the video horizontally or skip first N frames.
238
-
239
- # In[ ]:
240
-
241
-
242
- class VideoPlayer:
243
- """
244
- Custom video player to fulfill FPS requirements. You can set target FPS and output size,
245
- flip the video horizontally or skip first N frames.
246
-
247
- :param source: Video source. It could be either camera device or video file.
248
- :param size: Output frame size.
249
- :param flip: Flip source horizontally.
250
- :param fps: Target FPS.
251
- :param skip_first_frames: Skip first N frames.
252
- """
253
-
254
- def __init__(self, source, size=None, flip=False, fps=None, skip_first_frames=0, width=1280, height=720):
255
- import cv2
256
-
257
- self.cv2 = cv2 # This is done to access the package in class methods
258
- self.__cap = cv2.VideoCapture(source)
259
- # try HD by default to get better video quality
260
- self.__cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
261
- self.__cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
262
-
263
- if not self.__cap.isOpened():
264
- raise RuntimeError(f"Cannot open {'camera' if isinstance(source, int) else ''} {source}")
265
- # skip first N frames
266
- self.__cap.set(cv2.CAP_PROP_POS_FRAMES, skip_first_frames)
267
- # fps of input file
268
- self.__input_fps = self.__cap.get(cv2.CAP_PROP_FPS)
269
- if self.__input_fps <= 0:
270
- self.__input_fps = 60
271
- # target fps given by user
272
- self.__output_fps = fps if fps is not None else self.__input_fps
273
- self.__flip = flip
274
- self.__size = None
275
- self.__interpolation = None
276
- if size is not None:
277
- self.__size = size
278
- # AREA better for shrinking, LINEAR better for enlarging
279
- self.__interpolation = cv2.INTER_AREA if size[0] < self.__cap.get(cv2.CAP_PROP_FRAME_WIDTH) else cv2.INTER_LINEAR
280
- # first frame
281
- _, self.__frame = self.__cap.read()
282
- self.__lock = threading.Lock()
283
- self.__thread = None
284
- self.__stop = False
285
-
286
- """
287
- Start playing.
288
- """
289
-
290
- def start(self):
291
- self.__stop = False
292
- self.__thread = threading.Thread(target=self.__run, daemon=True)
293
- self.__thread.start()
294
-
295
- """
296
- Stop playing and release resources.
297
- """
298
-
299
- def stop(self):
300
- self.__stop = True
301
- if self.__thread is not None:
302
- self.__thread.join()
303
- self.__cap.release()
304
-
305
- def __run(self):
306
- prev_time = 0
307
- while not self.__stop:
308
- t1 = time.time()
309
- ret, frame = self.__cap.read()
310
- if not ret:
311
- break
312
-
313
- # fulfill target fps
314
- if 1 / self.__output_fps < time.time() - prev_time:
315
- prev_time = time.time()
316
- # replace by current frame
317
- with self.__lock:
318
- self.__frame = frame
319
-
320
- t2 = time.time()
321
- # time to wait [s] to fulfill input fps
322
- wait_time = 1 / self.__input_fps - (t2 - t1)
323
- # wait until
324
- time.sleep(max(0, wait_time))
325
-
326
- self.__frame = None
327
-
328
- """
329
- Get current frame.
330
- """
331
-
332
- def next(self):
333
- import cv2
334
-
335
- with self.__lock:
336
- if self.__frame is None:
337
- return None
338
- # need to copy frame, because can be cached and reused if fps is low
339
- frame = self.__frame.copy()
340
- if self.__size is not None:
341
- frame = self.cv2.resize(frame, self.__size, interpolation=self.__interpolation)
342
- if self.__flip:
343
- frame = self.cv2.flip(frame, 1)
344
- return frame
345
-
346
-
347
- # ## Visualization
348
-
349
- # ### Segmentation
350
- #
351
- # Define a SegmentationMap NamedTuple that keeps the labels and colormap for a segmentation project/dataset. Create CityScapesSegmentation and BinarySegmentation SegmentationMaps. Create a function to convert a segmentation map to an RGB image with a colormap, and to show the segmentation result as an overlay over the original image.
352
-
353
- # In[ ]:
354
-
355
-
356
- class Label(NamedTuple):
357
- index: int
358
- color: Tuple
359
- name: Optional[str] = None
360
-
361
-
362
- # In[ ]:
363
-
364
-
365
- class SegmentationMap(NamedTuple):
366
- labels: List
367
-
368
- def get_colormap(self):
369
- return np.array([label.color for label in self.labels])
370
-
371
- def get_labels(self):
372
- labelnames = [label.name for label in self.labels]
373
- if any(labelnames):
374
- return labelnames
375
- else:
376
- return None
377
-
378
-
379
- # In[ ]:
380
-
381
-
382
- cityscape_labels = [
383
- Label(index=0, color=(128, 64, 128), name="road"),
384
- Label(index=1, color=(244, 35, 232), name="sidewalk"),
385
- Label(index=2, color=(70, 70, 70), name="building"),
386
- Label(index=3, color=(102, 102, 156), name="wall"),
387
- Label(index=4, color=(190, 153, 153), name="fence"),
388
- Label(index=5, color=(153, 153, 153), name="pole"),
389
- Label(index=6, color=(250, 170, 30), name="traffic light"),
390
- Label(index=7, color=(220, 220, 0), name="traffic sign"),
391
- Label(index=8, color=(107, 142, 35), name="vegetation"),
392
- Label(index=9, color=(152, 251, 152), name="terrain"),
393
- Label(index=10, color=(70, 130, 180), name="sky"),
394
- Label(index=11, color=(220, 20, 60), name="person"),
395
- Label(index=12, color=(255, 0, 0), name="rider"),
396
- Label(index=13, color=(0, 0, 142), name="car"),
397
- Label(index=14, color=(0, 0, 70), name="truck"),
398
- Label(index=15, color=(0, 60, 100), name="bus"),
399
- Label(index=16, color=(0, 80, 100), name="train"),
400
- Label(index=17, color=(0, 0, 230), name="motorcycle"),
401
- Label(index=18, color=(119, 11, 32), name="bicycle"),
402
- Label(index=19, color=(255, 255, 255), name="background"),
403
- ]
404
-
405
- CityScapesSegmentation = SegmentationMap(cityscape_labels)
406
-
407
- binary_labels = [
408
- Label(index=0, color=(255, 255, 255), name="background"),
409
- Label(index=1, color=(0, 0, 0), name="foreground"),
410
- ]
411
-
412
- BinarySegmentation = SegmentationMap(binary_labels)
413
-
414
-
415
- # In[ ]:
416
-
417
-
418
- def segmentation_map_to_image(result: np.ndarray, colormap: np.ndarray, remove_holes: bool = False) -> np.ndarray:
419
- """
420
- Convert network result of floating point numbers to an RGB image with
421
- integer values from 0-255 by applying a colormap.
422
-
423
- :param result: A single network result after converting to pixel values in H,W or 1,H,W shape.
424
- :param colormap: A numpy array of shape (num_classes, 3) with an RGB value per class.
425
- :param remove_holes: If True, remove holes in the segmentation result.
426
- :return: An RGB image where each pixel is an int8 value according to colormap.
427
- """
428
- import cv2
429
-
430
- if len(result.shape) != 2 and result.shape[0] != 1:
431
- raise ValueError(f"Expected result with shape (H,W) or (1,H,W), got result with shape {result.shape}")
432
-
433
- if len(np.unique(result)) > colormap.shape[0]:
434
- raise ValueError(
435
- f"Expected max {colormap[0]} classes in result, got {len(np.unique(result))} "
436
- "different output values. Please make sure to convert the network output to "
437
- "pixel values before calling this function."
438
- )
439
- elif result.shape[0] == 1:
440
- result = result.squeeze(0)
441
-
442
- result = result.astype(np.uint8)
443
-
444
- contour_mode = cv2.RETR_EXTERNAL if remove_holes else cv2.RETR_TREE
445
- mask = np.zeros((result.shape[0], result.shape[1], 3), dtype=np.uint8)
446
- for label_index, color in enumerate(colormap):
447
- label_index_map = result == label_index
448
- label_index_map = label_index_map.astype(np.uint8) * 255
449
- contours, hierarchies = cv2.findContours(label_index_map, contour_mode, cv2.CHAIN_APPROX_SIMPLE)
450
- cv2.drawContours(
451
- mask,
452
- contours,
453
- contourIdx=-1,
454
- color=color.tolist(),
455
- thickness=cv2.FILLED,
456
- )
457
-
458
- return mask
459
-
460
-
461
- def segmentation_map_to_overlay(image, result, alpha, colormap, remove_holes=False) -> np.ndarray:
462
- """
463
- Returns a new image where a segmentation mask (created with colormap) is overlayed on
464
- the source image.
465
-
466
- :param image: Source image.
467
- :param result: A single network result after converting to pixel values in H,W or 1,H,W shape.
468
- :param alpha: Alpha transparency value for the overlay image.
469
- :param colormap: A numpy array of shape (num_classes, 3) with an RGB value per class.
470
- :param remove_holes: If True, remove holes in the segmentation result.
471
- :return: An RGP image with segmentation mask overlayed on the source image.
472
- """
473
- import cv2
474
-
475
- if len(image.shape) == 2:
476
- image = np.repeat(np.expand_dims(image, -1), 3, 2)
477
- mask = segmentation_map_to_image(result, colormap, remove_holes)
478
- image_height, image_width = image.shape[:2]
479
- mask = cv2.resize(src=mask, dsize=(image_width, image_height))
480
- return cv2.addWeighted(mask, alpha, image, 1 - alpha, 0)
481
-
482
-
483
- # ### Network Results
484
- #
485
- # Show network result image, optionally together with the source image and a legend with labels.
486
-
487
- # In[ ]:
488
-
489
-
490
- def viz_result_image(
491
- result_image: np.ndarray,
492
- source_image: np.ndarray = None,
493
- source_title: str = None,
494
- result_title: str = None,
495
- labels: List[Label] = None,
496
- resize: bool = False,
497
- bgr_to_rgb: bool = False,
498
- hide_axes: bool = False,
499
- ):
500
- """
501
- Show result image, optionally together with source images, and a legend with labels.
502
-
503
- :param result_image: Numpy array of RGB result image.
504
- :param source_image: Numpy array of source image. If provided this image will be shown
505
- next to the result image. source_image is expected to be in RGB format.
506
- Set bgr_to_rgb to True if source_image is in BGR format.
507
- :param source_title: Title to display for the source image.
508
- :param result_title: Title to display for the result image.
509
- :param labels: List of labels. If provided, a legend will be shown with the given labels.
510
- :param resize: If true, resize the result image to the same shape as the source image.
511
- :param bgr_to_rgb: If true, convert the source image from BGR to RGB. Use this option if
512
- source_image is a BGR image.
513
- :param hide_axes: If true, do not show matplotlib axes.
514
- :return: Matplotlib figure with result image
515
- """
516
- import cv2
517
- import matplotlib.pyplot as plt
518
- from matplotlib.lines import Line2D
519
-
520
- if bgr_to_rgb:
521
- source_image = to_rgb(source_image)
522
- if resize:
523
- result_image = cv2.resize(result_image, (source_image.shape[1], source_image.shape[0]))
524
-
525
- num_images = 1 if source_image is None else 2
526
-
527
- fig, ax = plt.subplots(1, num_images, figsize=(16, 8), squeeze=False)
528
- if source_image is not None:
529
- ax[0, 0].imshow(source_image)
530
- ax[0, 0].set_title(source_title)
531
-
532
- ax[0, num_images - 1].imshow(result_image)
533
- ax[0, num_images - 1].set_title(result_title)
534
-
535
- if hide_axes:
536
- for a in ax.ravel():
537
- a.axis("off")
538
- if labels:
539
- colors = labels.get_colormap()
540
- lines = [
541
- Line2D(
542
- [0],
543
- [0],
544
- color=[item / 255 for item in c.tolist()],
545
- linewidth=3,
546
- linestyle="-",
547
- )
548
- for c in colors
549
- ]
550
- plt.legend(
551
- lines,
552
- labels.get_labels(),
553
- bbox_to_anchor=(1, 1),
554
- loc="upper left",
555
- prop={"size": 12},
556
- )
557
- plt.close(fig)
558
- return fig
559
-
560
-
561
- # ### Live Inference
562
-
563
- # In[ ]:
564
-
565
-
566
- def show_array(frame: np.ndarray, display_handle=None):
567
- """
568
- Display array `frame`. Replace information at `display_handle` with `frame`
569
- encoded as jpeg image. `frame` is expected to have data in BGR order.
570
-
571
- Create a display_handle with: `display_handle = display(display_id=True)`
572
- """
573
- import cv2
574
-
575
- _, frame = cv2.imencode(ext=".jpeg", img=frame)
576
- if display_handle is None:
577
- display_handle = display(Image(data=frame.tobytes()), display_id=True)
578
- else:
579
- display_handle.update(Image(data=frame.tobytes()))
580
- return display_handle
581
-
582
-
583
- # ## Checks and Alerts
584
- #
585
- # Create an alert class to show stylized info/error/warning messages and a `check_device` function that checks whether a given device is available.
586
-
587
- # In[ ]:
588
-
589
-
590
- class NotebookAlert(Exception):
591
- def __init__(self, message: str, alert_class: str):
592
- """
593
- Show an alert box with the given message.
594
-
595
- :param message: The message to display.
596
- :param alert_class: The class for styling the message. Options: info, warning, success, danger.
597
- """
598
- self.message = message
599
- self.alert_class = alert_class
600
- self.show_message()
601
-
602
- def show_message(self):
603
- display(HTML(f"""<div class="alert alert-{self.alert_class}">{self.message}"""))
604
-
605
-
606
- class DeviceNotFoundAlert(NotebookAlert):
607
- def __init__(self, device: str):
608
- """
609
- Show a warning message about an unavailable device. This class does not check whether or
610
- not the device is available, use the `check_device` function to check this. `check_device`
611
- also shows the warning if the device is not found.
612
-
613
- :param device: The unavailable device.
614
- :return: A formatted alert box with the message that `device` is not available, and a list
615
- of devices that are available.
616
- """
617
- ie = Core()
618
- supported_devices = ie.available_devices
619
- self.message = f"Running this cell requires a {device} device, " "which is not available on this system. "
620
- self.alert_class = "warning"
621
- if len(supported_devices) == 1:
622
- self.message += f"The following device is available: {ie.available_devices[0]}"
623
- else:
624
- self.message += "The following devices are available: " f"{', '.join(ie.available_devices)}"
625
- super().__init__(self.message, self.alert_class)
626
-
627
-
628
- def check_device(device: str) -> bool:
629
- """
630
- Check if the specified device is available on the system.
631
-
632
- :param device: Device to check. e.g. CPU, GPU
633
- :return: True if the device is available, False if not. If the device is not available,
634
- a DeviceNotFoundAlert will be shown.
635
- """
636
- ie = Core()
637
- if device not in ie.available_devices:
638
- DeviceNotFoundAlert(device)
639
- return False
640
- else:
641
- return True
642
-
643
-
644
- def check_openvino_version(version: str) -> bool:
645
- """
646
- Check if the specified OpenVINO version is installed.
647
-
648
- :param version: the OpenVINO version to check. Example: 2021.4
649
- :return: True if the version is installed, False if not. If the version is not installed,
650
- an alert message will be shown.
651
- """
652
- installed_version = get_version()
653
- if version not in installed_version:
654
- NotebookAlert(
655
- f"This notebook requires OpenVINO {version}. "
656
- f"The version on your system is: <i>{installed_version}</i>.<br>"
657
- "Please run <span style='font-family:monospace'>pip install --upgrade -r requirements.txt</span> "
658
- "in the openvino_env environment to install this version. "
659
- "See the <a href='https://github.com/openvinotoolkit/openvino_notebooks'>"
660
- "OpenVINO Notebooks README</a> for detailed instructions",
661
- alert_class="danger",
662
- )
663
- return False
664
- else:
665
- return True
666
-
667
-
668
- packed_layername_tensor_dict_list = [{"name": "aten::mul/Multiply"}]
669
-
670
-
671
- class ReplaceTensor(MatcherPass):
672
- def __init__(self, packed_layername_tensor_dict_list):
673
- MatcherPass.__init__(self)
674
- self.model_changed = False
675
-
676
- param = WrapType("opset10.Multiply")
677
-
678
- def callback(matcher: Matcher) -> bool:
679
- root = matcher.get_match_root()
680
- if root is None:
681
- return False
682
- for y in packed_layername_tensor_dict_list:
683
- root_name = root.get_friendly_name()
684
- if root_name.find(y["name"]) != -1:
685
- max_fp16 = np.array([[[[-np.finfo(np.float16).max]]]]).astype(np.float32)
686
- new_tenser = ops.constant(max_fp16, Type.f32, name="Constant_4431")
687
- root.set_arguments([root.input_value(0).node, new_tenser])
688
- packed_layername_tensor_dict_list.remove(y)
689
-
690
- return True
691
-
692
- self.register_matcher(Matcher(param, "ReplaceTensor"), callback)
693
-
694
-
695
- def optimize_bge_embedding(model_path, output_model_path):
696
- """
697
- optimize_bge_embedding used to optimize BGE model for NPU device
698
-
699
- Arguments:
700
- model_path {str} -- original BGE IR model path
701
- output_model_path {str} -- Converted BGE IR model path
702
- """
703
- core = Core()
704
- ov_model = core.read_model(model_path)
705
- manager = Manager()
706
- manager.register_pass(ReplaceTensor(packed_layername_tensor_dict_list))
707
- manager.run_passes(ov_model)
708
- ov.save_model(ov_model, output_model_path, compress_to_fp16=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils/pip_helper.py DELETED
@@ -1,10 +0,0 @@
1
- import sys
2
-
3
-
4
- def pip_install(*args):
5
- import subprocess # nosec - disable B404:import-subprocess check
6
-
7
- cli_args = []
8
- for arg in args:
9
- cli_args.extend(str(arg).split(" "))
10
- subprocess.run([sys.executable, "-m", "pip", "install", *cli_args], check=True)
 
 
 
 
 
 
 
 
 
 
 
src/whisper_audio_extractor.py DELETED
@@ -1,47 +0,0 @@
1
- # whisper_audio_extractor.py
2
-
3
- import sounddevice as sd
4
- from scipy.io.wavfile import write
5
- import whisper
6
- import os
7
-
8
- # Ruta para guardar el archivo de audio temporalmente
9
- AUDIO_PATH = os.path.join("..", "assets", "audio", "recorded_audio.wav")
10
-
11
- def record_audio(duration=5, sample_rate=44100):
12
- """
13
- Graba el audio del micrófono durante un tiempo específico y lo guarda como archivo WAV.
14
-
15
- Args:
16
- duration (int): Duración de la grabación en segundos.
17
- sample_rate (int): Frecuencia de muestreo del audio.
18
- """
19
- print("Grabando...")
20
- audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=2)
21
- sd.wait() # Espera a que finalice la grabación
22
- write(AUDIO_PATH, sample_rate, audio_data) # Guarda el audio en el directorio especificado
23
- print(f"Grabación completa. Archivo guardado en {AUDIO_PATH}")
24
-
25
- def transcribe_audio():
26
- """
27
- Usa el modelo Whisper para transcribir el audio grabado y devuelve el texto.
28
-
29
- Returns:
30
- str: Texto transcrito del audio.
31
- """
32
- # Cargar el modelo de Whisper
33
- model = whisper.load_model("base")
34
-
35
- # Transcribir el audio
36
- print("Transcribiendo el audio...")
37
- result = model.transcribe(AUDIO_PATH)
38
- print("Transcripción completada.")
39
- return result["text"]
40
-
41
- if __name__ == "__main__":
42
- # Paso 1: Grabar audio
43
- record_audio()
44
-
45
- # Paso 2: Transcribir audio
46
- texto = transcribe_audio()
47
- print("Texto extraído:", texto)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/whisper_audio_transcriber.py DELETED
@@ -1,109 +0,0 @@
1
- # whisper_audio_transcriber.py
2
-
3
- import os
4
- from pathlib import Path
5
- import requests
6
- import librosa
7
- from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, pipeline
8
- from transformers.utils import logging
9
- import soundfile as sf
10
-
11
- # Definición de modelos
12
- model_ids = {
13
- "Multilingual models": [
14
- "openai/whisper-large-v3-turbo",
15
- "openai/whisper-large-v3",
16
- "openai/whisper-large-v2",
17
- "openai/whisper-large",
18
- "openai/whisper-medium",
19
- "openai/whisper-small",
20
- "openai/whisper-base",
21
- "openai/whisper-tiny",
22
- ],
23
- "English-only models": [
24
- "distil-whisper/distil-large-v2",
25
- "distil-whisper/distil-large-v3",
26
- "distil-whisper/distil-medium.en",
27
- "distil-whisper/distil-small.en",
28
- "openai/whisper-medium.en",
29
- "openai/whisper-small.en",
30
- "openai/whisper-base.en",
31
- "openai/whisper-tiny.en",
32
- ],
33
- }
34
-
35
- def download_file(url, filename, directory="."):
36
- """
37
- Descarga un archivo desde una URL y lo guarda en el directorio especificado.
38
- """
39
- os.makedirs(directory, exist_ok=True)
40
- filepath = Path(directory) / filename
41
- response = requests.get(url)
42
- filepath.write_bytes(response.content)
43
- return filepath
44
-
45
- def transcribe_audio(file_path, model_name):
46
- """
47
- Transcribe el audio utilizando un modelo de Whisper.
48
-
49
- Args:
50
- file_path (str): Ruta del archivo de audio.
51
- model_name (str): Nombre del modelo de Whisper.
52
-
53
- Returns:
54
- str: Transcripción del audio.
55
- """
56
- processor = AutoProcessor.from_pretrained(model_name)
57
- model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name)
58
-
59
- # Crear pipeline para transcripción
60
- pipe = pipeline(
61
- "automatic-speech-recognition",
62
- model=model,
63
- tokenizer=processor.tokenizer,
64
- feature_extractor=processor.feature_extractor,
65
- device="cpu", # Cambiar a "cuda" si tienes una GPU disponible
66
- )
67
-
68
- # Cargar el archivo de audio
69
- audio_data, samplerate = librosa.load(file_path, sr=16000)
70
-
71
- # Transcribir el audio
72
- result = pipe(audio_data)
73
- return result["text"]
74
-
75
- def guardar_transcripcion(texto, filename="transcripcion.txt", directory="../results"):
76
- """
77
- Guarda el texto transcrito en un archivo .txt en el directorio especificado.
78
-
79
- Args:
80
- texto (str): Texto transcrito que se desea guardar.
81
- filename (str): Nombre del archivo .txt.
82
- directory (str): Directorio donde se guardará el archivo.
83
- """
84
- os.makedirs(directory, exist_ok=True) # Crea el directorio si no existe
85
- file_path = Path(directory) / filename
86
- with open(file_path, "w", encoding="utf-8") as f:
87
- f.write(texto)
88
- print(f"Transcripción guardada en: {file_path}")
89
-
90
- def main():
91
- # Configuración de logging para errores únicamente
92
- logging.set_verbosity_error()
93
-
94
- # Ruta del archivo de audio
95
- audio_path = os.path.abspath("../miwav2lipv6/assets/audio/grabacion_gradio.wav")
96
-
97
- # Modelo seleccionado
98
- model_name = "openai/whisper-large" # Cambia esto al modelo deseado
99
-
100
- # Transcribir el audio
101
- print(f"Transcribiendo el audio del archivo: {audio_path}")
102
- transcription = transcribe_audio(audio_path, model_name)
103
- print(f"Transcripción: {transcription}")
104
-
105
- # Guardar la transcripción en un archivo .txt
106
- guardar_transcripcion(transcription)
107
-
108
- if __name__ == "__main__":
109
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_whisper_audio_extractor.py DELETED
@@ -1,29 +0,0 @@
1
- import os
2
- import pytest
3
- from src.whisper_audio_extractor import record_audio, transcribe_audio, AUDIO_PATH
4
-
5
- def test_record_audio():
6
- """
7
- Verifica que la función de grabación crea un archivo de audio con un tamaño válido.
8
- """
9
- # Ejecuta la grabación con una duración de prueba corta
10
- record_audio(duration=2) # Graba por 2 segundos para el test
11
-
12
- # Comprueba si el archivo de audio existe
13
- assert os.path.exists(AUDIO_PATH), "El archivo de audio no fue creado."
14
-
15
- # Comprueba que el archivo no esté vacío
16
- assert os.path.getsize(AUDIO_PATH) > 0, "El archivo de audio está vacío."
17
-
18
- def test_transcribe_audio():
19
- """
20
- Verifica que la función de transcripción devuelve texto.
21
- """
22
- # Ejecuta la transcripción del audio grabado
23
- transcription = transcribe_audio()
24
-
25
- # Asegura que se obtuvo texto
26
- assert isinstance(transcription, str) and len(transcription) > 0, "La transcripción está vacía o no es texto."
27
-
28
- if __name__ == "__main__":
29
- pytest.main()