Upload folder using huggingface_hub
Browse files- .gitattributes +3 -0
- .gitignore +10 -0
- .gradio/certificate.pem +31 -0
- .gradio/flagged/dataset1.csv +2 -0
- LICENSE +0 -0
- README.md +24 -7
- To +0 -0
- configurar_entorno.sh +43 -0
- dataset.csv +3 -0
- drtd_model/.gitattributes +35 -0
- drtd_model/README.md +1 -0
- drtd_model/config.json +1 -0
- examples/test_script.py +9 -0
- generar_csv.py +38 -0
- grabacion1.wav +3 -0
- grabar.sh +35 -0
- guion.mp3 +0 -0
- guion.txt +3 -0
- instalar_dependencias.sh +7 -0
- kuchi_voice.wav +3 -0
- leer_texto.py +25 -0
- mi_voz.wav +3 -0
- output.mp3 +0 -0
- output.wav +0 -0
- prueba.py +2 -0
- reproducir_con_mi_voz.py +50 -0
- reproducir_web.py +47 -0
- requirements.txt +8 -0
- src/__init__.py +0 -0
- src/app_kuchiyuya.py +41 -0
- src/infer_drt.py +32 -0
- src/inference.py +30 -0
- src/train.py +19 -0
- src/train.py.save +152 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
grabacion1.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
kuchi_voice.wav filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
mi_voz.wav filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ignora entorno virtual
|
| 2 |
+
venv/
|
| 3 |
+
|
| 4 |
+
# Ignora audios locales
|
| 5 |
+
voces/*.wav
|
| 6 |
+
|
| 7 |
+
# Python cache
|
| 8 |
+
__pycache__/
|
| 9 |
+
*.pyc
|
| 10 |
+
.gitmodules
|
.gradio/certificate.pem
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-----BEGIN CERTIFICATE-----
|
| 2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
| 3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
| 4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
| 5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
| 6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
| 7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
| 8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
| 9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
| 10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
| 11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
| 12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
| 13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
| 14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
| 15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
| 16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
| 17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
| 18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
| 19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
| 20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
| 21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
| 22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
| 23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
| 24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
| 25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
| 26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
| 27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
| 28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
| 29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
| 30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
| 31 |
+
-----END CERTIFICATE-----
|
.gradio/flagged/dataset1.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"Texto (opcional, se ignora)",π Frase aleatoria,timestamp
|
| 2 |
+
,,2025-05-23 05:31:28.515590
|
LICENSE
ADDED
|
File without changes
|
README.md
CHANGED
|
@@ -1,12 +1,29 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
colorFrom: gray
|
| 5 |
-
colorTo: yellow
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.31.0
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
|
|
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: voice
|
| 3 |
+
app_file: src/app_kuchiyuya.py
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
sdk_version: 5.31.0
|
|
|
|
|
|
|
| 6 |
---
|
| 7 |
+
# π½ Kuchiyuya Voice Engine
|
| 8 |
|
| 9 |
+
**Bienvenido al motor de voz del universo Ariplanteater.**
|
| 10 |
+
Este repositorio contiene el proyecto inicial para entrenar, simular y desplegar modelos de voz inspirados en los personajes de la saga Kuchiuya.
|
| 11 |
+
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
## π ΒΏQuΓ© contiene?
|
| 15 |
+
|
| 16 |
+
- π§ `src/train.py` β Entrenamiento ficticio del modelo `KuchiyuyaNet`.
|
| 17 |
+
- ποΈ `src/inference.py` β Interfaz Gradio para convertir texto en sonido (simulado).
|
| 18 |
+
- π§ͺ `examples/test_script.py` β Script de prueba del sistema.
|
| 19 |
+
- π¦ `requirements.txt` β Lista de dependencias mΓnimas.
|
| 20 |
+
- π `models/` β Carpeta donde se guardarΓ‘ el modelo final (simulado).
|
| 21 |
+
|
| 22 |
+
---
|
| 23 |
+
|
| 24 |
+
## π§° Requisitos
|
| 25 |
+
|
| 26 |
+
```bash
|
| 27 |
+
python -m venv venv
|
| 28 |
+
source venv/bin/activate
|
| 29 |
+
pip install -r requirements.txt
|
To
ADDED
|
File without changes
|
configurar_entorno.sh
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
echo "π Iniciando configuraciΓ³n Kuchiyuya Pro..."
|
| 4 |
+
|
| 5 |
+
# 1. Verificar carpeta del entorno virtual
|
| 6 |
+
if [ ! -d "venv" ]; then
|
| 7 |
+
echo "π οΈ Creando entorno virtual..."
|
| 8 |
+
python3 -m venv venv
|
| 9 |
+
fi
|
| 10 |
+
|
| 11 |
+
# 2. Activar entorno
|
| 12 |
+
source venv/bin/activate
|
| 13 |
+
echo "β
Entorno virtual activado"
|
| 14 |
+
|
| 15 |
+
# 3. Verificar requirements.txt
|
| 16 |
+
if [ ! -f "requirements.txt" ]; then
|
| 17 |
+
echo "β No se encontrΓ³ requirements.txt. Abortando."
|
| 18 |
+
exit 1
|
| 19 |
+
fi
|
| 20 |
+
|
| 21 |
+
# 4. Instalar dependencias
|
| 22 |
+
echo "π¦ Instalando dependencias..."
|
| 23 |
+
pip install --upgrade pip
|
| 24 |
+
pip install -r requirements.txt
|
| 25 |
+
|
| 26 |
+
# 5. ValidaciΓ³n de rutas
|
| 27 |
+
if [ ! -d "src" ]; then
|
| 28 |
+
echo "β Carpeta 'src/' no encontrada. Crea src/inference.py"
|
| 29 |
+
exit 1
|
| 30 |
+
fi
|
| 31 |
+
|
| 32 |
+
if [ ! -d "voces" ]; then
|
| 33 |
+
echo "π Creando carpeta de voces..."
|
| 34 |
+
mkdir -p voces
|
| 35 |
+
fi
|
| 36 |
+
|
| 37 |
+
# 6. Preparar modelo DRTD si fuera necesario (placeholder para kaggle/huggingface)
|
| 38 |
+
echo "π Verifica que tu modelo DRTD estΓ© en src/model_drt/"
|
| 39 |
+
mkdir -p src/model_drt
|
| 40 |
+
|
| 41 |
+
echo "β
Todo listo para correr tu demo:"
|
| 42 |
+
echo "π Ejecuta: python src/inference.py"
|
| 43 |
+
|
dataset.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
path,text
|
| 2 |
+
voces/kuchiyuya1.wav,"Kuchiyuyas"
|
| 3 |
+
|
drtd_model/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
drtd_model/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Modelo Kuchiyuya TTS inicial
|
drtd_model/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{ "architectures": ["DummyTTS"] }
|
examples/test_script.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
# AΓ±ade la carpeta src al path
|
| 5 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')))
|
| 6 |
+
|
| 7 |
+
from train import train
|
| 8 |
+
|
| 9 |
+
train()
|
generar_csv.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import csv
|
| 3 |
+
|
| 4 |
+
# Carpeta donde estΓ‘n los .wav
|
| 5 |
+
ruta = "voces"
|
| 6 |
+
|
| 7 |
+
# Obtener y ordenar archivos .wav
|
| 8 |
+
archivos = sorted([f for f in os.listdir(ruta) if f.endswith(".wav")])
|
| 9 |
+
|
| 10 |
+
# Verifica si hay archivos
|
| 11 |
+
if not archivos:
|
| 12 |
+
print("β No se encontraron archivos .wav en la carpeta 'voces'.")
|
| 13 |
+
exit()
|
| 14 |
+
|
| 15 |
+
# Crear archivo dataset.csv
|
| 16 |
+
with open("dataset.csv", "w", newline='', encoding='utf-8') as f:
|
| 17 |
+
writer = csv.writer(f)
|
| 18 |
+
writer.writerow(["path", "text"])
|
| 19 |
+
|
| 20 |
+
print("π§ Comenzando anotaciΓ³n de dataset...\n")
|
| 21 |
+
|
| 22 |
+
for nombre in archivos:
|
| 23 |
+
ruta_relativa = os.path.join(ruta, nombre)
|
| 24 |
+
print(f"\nπ§ Reproduciendo: {ruta_relativa}")
|
| 25 |
+
os.system(f"aplay '{ruta_relativa}'") # Compatibilidad con espacios en nombres
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
texto = input(f"π ΒΏQuΓ© dice exactamente '{nombre}'? ").strip()
|
| 29 |
+
if texto:
|
| 30 |
+
writer.writerow([ruta_relativa, texto])
|
| 31 |
+
print("β
Guardado.")
|
| 32 |
+
else:
|
| 33 |
+
print("β οΈ Entrada vacΓa. Archivo omitido.")
|
| 34 |
+
except KeyboardInterrupt:
|
| 35 |
+
print("\nβ Proceso interrumpido por el usuario.")
|
| 36 |
+
break
|
| 37 |
+
|
| 38 |
+
print("\nπ¦ Dataset final guardado como 'dataset.csv'")
|
grabacion1.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb6b351f3f7d35e6ad90c5814e4936502ba40d1d4c528dee27cb18b0beabe4bb
|
| 3 |
+
size 1058444
|
grabar.sh
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
echo "ποΈ Grabador Kuchiyuya Pro"
|
| 4 |
+
|
| 5 |
+
mkdir -p voces
|
| 6 |
+
|
| 7 |
+
read -p "π Nombre del archivo (sin .wav): " filename
|
| 8 |
+
read -p "β±οΈ DuraciΓ³n de la grabaciΓ³n (segundos): " duracion
|
| 9 |
+
read -p "π Frase (texto asociado a la grabaciΓ³n): " texto
|
| 10 |
+
|
| 11 |
+
archivo="voces/${filename}.wav"
|
| 12 |
+
|
| 13 |
+
echo "π€ Grabando durante $duracion segundos..."
|
| 14 |
+
arecord -d "$duracion" -f cd -r 16000 -c 1 "$archivo"
|
| 15 |
+
|
| 16 |
+
echo "β
GrabaciΓ³n guardada como $archivo"
|
| 17 |
+
|
| 18 |
+
# ReproducciΓ³n
|
| 19 |
+
aplay "$archivo"
|
| 20 |
+
|
| 21 |
+
# Visualizar forma de onda si tienes sox
|
| 22 |
+
if command -v play >/dev/null 2>&1; then
|
| 23 |
+
echo "π Visualizando forma de onda con sox..."
|
| 24 |
+
play "$archivo" norm | sox "$archivo" -n stat
|
| 25 |
+
else
|
| 26 |
+
echo "β οΈ 'sox' no estΓ‘ instalado. Sin visualizaciΓ³n de onda."
|
| 27 |
+
fi
|
| 28 |
+
|
| 29 |
+
# AΓ±adir al dataset.csv
|
| 30 |
+
csv="dataset.csv"
|
| 31 |
+
if [ ! -f "$csv" ]; then
|
| 32 |
+
echo "path,text,speaker" > "$csv"
|
| 33 |
+
fi
|
| 34 |
+
echo "$archivo,\"$texto\",\"$filename\"" >> "$csv"
|
| 35 |
+
echo "ποΈ AΓ±adido al $csv: \"$texto\""
|
guion.mp3
ADDED
|
Binary file (90.8 kB). View file
|
|
|
guion.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Hola, soy DRDELATV y esta es mi voz definitiva para dominar el metaverso Kuchiyuya.
|
| 2 |
+
[Enter + Ctrl+D]
|
| 3 |
+
|
instalar_dependencias.sh
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "π§ Instalando dependencias: sox, aplay, arecord, gradio, gtts..."
|
| 3 |
+
|
| 4 |
+
sudo apt update
|
| 5 |
+
sudo apt install -y sox alsa-utils python3-pip
|
| 6 |
+
pip install gradio gtts
|
| 7 |
+
echo "β
Todo listo para grabar y reproducir voz Kuchiyuya."
|
kuchi_voice.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8e087b27899dec2f9246407208c519e674dd7a299add2b0ede372c7d94ee831
|
| 3 |
+
size 7938044
|
leer_texto.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from gtts import gTTS
|
| 3 |
+
|
| 4 |
+
print("π Escribe o pega tu texto de presentaciΓ³n. Termina con Enter + Ctrl+D (en Linux):\n")
|
| 5 |
+
|
| 6 |
+
# Leer texto completo desde stdin
|
| 7 |
+
try:
|
| 8 |
+
texto = ""
|
| 9 |
+
while True:
|
| 10 |
+
linea = input()
|
| 11 |
+
texto += linea + "\n"
|
| 12 |
+
except EOFError:
|
| 13 |
+
pass
|
| 14 |
+
|
| 15 |
+
# Guardar texto
|
| 16 |
+
with open("guion.txt", "w", encoding="utf-8") as f:
|
| 17 |
+
f.write(texto)
|
| 18 |
+
|
| 19 |
+
# Convertir a voz
|
| 20 |
+
tts = gTTS(text=texto, lang="es")
|
| 21 |
+
tts.save("guion.mp3")
|
| 22 |
+
|
| 23 |
+
# Reproducir
|
| 24 |
+
print("π Reproduciendo tu presentaciΓ³n con voz generada...")
|
| 25 |
+
os.system("mpg123 guion.mp3 || ffplay -nodisp -autoexit guion.mp3")
|
mi_voz.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbee525a2252f611f1ab306d8f0de537b019903981572fb67d985c336be62f5c
|
| 3 |
+
size 1764044
|
output.mp3
ADDED
|
Binary file (10.6 kB). View file
|
|
|
output.wav
ADDED
|
Binary file (96 kB). View file
|
|
|
prueba.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
print("π· PyTorch estΓ‘ funcionando:", torch.cuda.is_available())
|
reproducir_con_mi_voz.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import csv
|
| 3 |
+
import difflib
|
| 4 |
+
from pydub import AudioSegment
|
| 5 |
+
from pydub.playback import play
|
| 6 |
+
import unicodedata
|
| 7 |
+
|
| 8 |
+
def limpiar_texto(texto):
|
| 9 |
+
texto = texto.lower().strip()
|
| 10 |
+
texto = unicodedata.normalize("NFKD", texto).encode("ascii", "ignore").decode("utf-8")
|
| 11 |
+
return texto
|
| 12 |
+
|
| 13 |
+
# Cargar dataset
|
| 14 |
+
dataset = []
|
| 15 |
+
with open("dataset.csv", newline='', encoding='utf-8') as f:
|
| 16 |
+
reader = csv.DictReader(f)
|
| 17 |
+
for row in reader:
|
| 18 |
+
dataset.append({
|
| 19 |
+
"path": row["path"],
|
| 20 |
+
"text": limpiar_texto(row["text"])
|
| 21 |
+
})
|
| 22 |
+
|
| 23 |
+
# Entrada del usuario
|
| 24 |
+
print("π Escribe tu texto y buscarΓ© el audio mΓ‘s parecido con tu voz grabada:")
|
| 25 |
+
texto_input = input("π ").strip()
|
| 26 |
+
texto_clean = limpiar_texto(texto_input)
|
| 27 |
+
|
| 28 |
+
# Buscar coincidencias con difflib
|
| 29 |
+
frases = [d["text"] for d in dataset]
|
| 30 |
+
matches = difflib.get_close_matches(texto_clean, frases, n=3, cutoff=0.5)
|
| 31 |
+
|
| 32 |
+
if not matches:
|
| 33 |
+
print("β No encontrΓ© coincidencias suficientes. Intenta con otra frase.")
|
| 34 |
+
exit()
|
| 35 |
+
|
| 36 |
+
# Mostrar opciones
|
| 37 |
+
print("\nπ― Frase(s) encontrada(s):")
|
| 38 |
+
for i, frase in enumerate(matches):
|
| 39 |
+
print(f"{i+1}. {frase}")
|
| 40 |
+
|
| 41 |
+
# Usar la primera como predeterminada
|
| 42 |
+
match_texto = matches[0]
|
| 43 |
+
|
| 44 |
+
# Buscar el path
|
| 45 |
+
ruta_audio = next(d["path"] for d in dataset if d["text"] == match_texto)
|
| 46 |
+
|
| 47 |
+
# Reproducir
|
| 48 |
+
print(f"\nπ Reproduciendo audio: {ruta_audio}")
|
| 49 |
+
audio = AudioSegment.from_wav(ruta_audio)
|
| 50 |
+
play(audio)
|
reproducir_web.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import csv
|
| 3 |
+
import os
|
| 4 |
+
import difflib
|
| 5 |
+
from pydub import AudioSegment
|
| 6 |
+
from pydub.playback import play
|
| 7 |
+
|
| 8 |
+
DATASET = "dataset.csv"
|
| 9 |
+
AUDIO_DIR = "voces"
|
| 10 |
+
|
| 11 |
+
# Carga el dataset
|
| 12 |
+
def cargar_dataset():
|
| 13 |
+
with open(DATASET, newline='', encoding='utf-8') as f:
|
| 14 |
+
reader = csv.DictReader(f)
|
| 15 |
+
return [{"path": row["path"], "text": row["text"].lower().strip()} for row in reader]
|
| 16 |
+
|
| 17 |
+
dataset = cargar_dataset()
|
| 18 |
+
|
| 19 |
+
def buscar_audio(texto):
|
| 20 |
+
texto = texto.lower().strip()
|
| 21 |
+
coincidencias = difflib.get_close_matches(texto, [d["text"] for d in dataset], n=1, cutoff=0.6)
|
| 22 |
+
if not coincidencias:
|
| 23 |
+
return None, "β No encontrΓ© coincidencias."
|
| 24 |
+
|
| 25 |
+
for d in dataset:
|
| 26 |
+
if d["text"] == coincidencias[0]:
|
| 27 |
+
ruta = d["path"]
|
| 28 |
+
return ruta, f"π Reproduciendo: {ruta}"
|
| 29 |
+
return None, "β Algo fallΓ³."
|
| 30 |
+
|
| 31 |
+
def demo_func(texto):
|
| 32 |
+
ruta, mensaje = buscar_audio(texto)
|
| 33 |
+
if ruta:
|
| 34 |
+
return ruta, mensaje
|
| 35 |
+
else:
|
| 36 |
+
return None, mensaje
|
| 37 |
+
|
| 38 |
+
demo = gr.Interface(
|
| 39 |
+
fn=demo_func,
|
| 40 |
+
inputs=gr.Textbox(label="Texto para buscar en tu voz real"),
|
| 41 |
+
outputs=[gr.Audio(label="Audio encontrado"), gr.Textbox(label="Resultado")],
|
| 42 |
+
title="π½ Voz Kuchiyuya - BΓΊsqueda de frase grabada",
|
| 43 |
+
description="Introduce una frase parecida a una grabada. El sistema reproducirΓ‘ el clip mΓ‘s cercano."
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
if __name__ == "__main__":
|
| 47 |
+
demo.launch(share=True)
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy==1.26.4
|
| 2 |
+
torch
|
| 3 |
+
torchaudio
|
| 4 |
+
scipy
|
| 5 |
+
librosa
|
| 6 |
+
soundfile
|
| 7 |
+
transformers
|
| 8 |
+
gradio
|
src/__init__.py
ADDED
|
File without changes
|
src/app_kuchiyuya.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import csv
|
| 3 |
+
import os
|
| 4 |
+
import difflib
|
| 5 |
+
from pydub import AudioSegment
|
| 6 |
+
from pydub.playback import play
|
| 7 |
+
|
| 8 |
+
DATASET = "dataset.csv"
|
| 9 |
+
AUDIO_DIR = "voces"
|
| 10 |
+
|
| 11 |
+
def cargar_dataset():
|
| 12 |
+
with open(DATASET, newline='', encoding='utf-8') as f:
|
| 13 |
+
reader = csv.DictReader(f)
|
| 14 |
+
return [{"path": row["path"], "text": row["text"].lower().strip()} for row in reader]
|
| 15 |
+
|
| 16 |
+
dataset = cargar_dataset()
|
| 17 |
+
|
| 18 |
+
def buscar_audio(texto):
|
| 19 |
+
texto = texto.lower().strip()
|
| 20 |
+
coincidencias = difflib.get_close_matches(texto, [d["text"] for d in dataset], n=1, cutoff=0.6)
|
| 21 |
+
if not coincidencias:
|
| 22 |
+
return None, "β No encontrΓ© coincidencias en tus clips grabados."
|
| 23 |
+
for d in dataset:
|
| 24 |
+
if d["text"] == coincidencias[0]:
|
| 25 |
+
return d["path"], f"π§ Clip encontrado: {d['path']}"
|
| 26 |
+
return None, "β οΈ Algo fallΓ³ en la bΓΊsqueda."
|
| 27 |
+
|
| 28 |
+
def demo_func(texto):
|
| 29 |
+
ruta, mensaje = buscar_audio(texto)
|
| 30 |
+
return (ruta if ruta else None), mensaje
|
| 31 |
+
|
| 32 |
+
demo = gr.Interface(
|
| 33 |
+
fn=demo_func,
|
| 34 |
+
inputs=gr.Textbox(label="Escribe una frase similar a una grabada"),
|
| 35 |
+
outputs=[gr.Audio(label="Clip de voz"), gr.Textbox(label="Resultado")],
|
| 36 |
+
title="π½ Voz Kuchiyuya Real",
|
| 37 |
+
description="Este demo busca y reproduce tu grabaciΓ³n mΓ‘s parecida al texto ingresado."
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
if __name__ == "__main__":
|
| 41 |
+
demo.launch(share=True)
|
src/infer_drt.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
| 5 |
+
|
| 6 |
+
# Modelo en Hugging Face
|
| 7 |
+
MODEL_ID = "DRDELATV/drtd"
|
| 8 |
+
TOKEN = os.getenv("HF_TOKEN") # β Token cargado desde .env o variable de entorno
|
| 9 |
+
|
| 10 |
+
print("π Cargando modelo DRTD desde Hugging Face...")
|
| 11 |
+
processor = AutoProcessor.from_pretrained(MODEL_ID, token=TOKEN)
|
| 12 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_ID, token=TOKEN)
|
| 13 |
+
model.eval()
|
| 14 |
+
|
| 15 |
+
def inferir_audio(texto):
|
| 16 |
+
inputs = processor(text=texto, return_tensors="pt")
|
| 17 |
+
with torch.no_grad():
|
| 18 |
+
outputs = model.generate(**inputs)
|
| 19 |
+
audio = processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
| 20 |
+
print(f"π€ Texto sintetizado: {texto}")
|
| 21 |
+
return audio
|
| 22 |
+
|
| 23 |
+
demo = gr.Interface(
|
| 24 |
+
fn=inferir_audio,
|
| 25 |
+
inputs=gr.Textbox(label="π Escribe algo..."),
|
| 26 |
+
outputs=gr.Textbox(label="π Resultado generado"),
|
| 27 |
+
title="π½ TTS DRTD Kuchiyuya",
|
| 28 |
+
description="Convierte texto en voz usando el modelo personalizado entrenado en HuggingFace"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
if __name__ == "__main__":
|
| 32 |
+
demo.launch(share=True)
|
src/inference.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import random
|
| 4 |
+
|
| 5 |
+
AUDIO_DIR = "voces"
|
| 6 |
+
|
| 7 |
+
def reproducir_random(texto):
|
| 8 |
+
if not os.path.exists(AUDIO_DIR):
|
| 9 |
+
return None, "β Carpeta 'voces/' no encontrada."
|
| 10 |
+
|
| 11 |
+
archivos = [f for f in os.listdir(AUDIO_DIR) if f.endswith(".wav")]
|
| 12 |
+
|
| 13 |
+
if not archivos:
|
| 14 |
+
return None, "β οΈ No hay archivos .wav en la carpeta 'voces/'."
|
| 15 |
+
|
| 16 |
+
elegido = random.choice(archivos)
|
| 17 |
+
ruta = os.path.join(AUDIO_DIR, elegido)
|
| 18 |
+
print(f"π€ Reproduciendo: {ruta}")
|
| 19 |
+
return ruta
|
| 20 |
+
|
| 21 |
+
demo = gr.Interface(
|
| 22 |
+
fn=reproducir_random,
|
| 23 |
+
inputs=gr.Textbox(label="Texto (opcional, se ignora)"),
|
| 24 |
+
outputs=gr.Audio(label="π Frase aleatoria"),
|
| 25 |
+
title="π½ Demo de Voz Kuchiyuya",
|
| 26 |
+
description="Reproduce una frase real grabada al azar desde la carpeta 'voces'. Ideal para demos, IA y entrenamiento de clones vocales."
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
demo.launch(share=True)
|
src/train.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
def train():
|
| 5 |
+
print("π· Iniciando entrenamiento de KuchiyuyaNet v1.0...")
|
| 6 |
+
print("π§ Preparando barro, afilando colmillos, calentando tokens...\n")
|
| 7 |
+
|
| 8 |
+
for i in range(1, 101):
|
| 9 |
+
time.sleep(0.05) # Simula tiempo de entrenamiento
|
| 10 |
+
loss = round(random.uniform(0.1, 1.0) / i, 4)
|
| 11 |
+
status = "π₯" if loss > 0.01 else "π"
|
| 12 |
+
print(f"{status} IteraciΓ³n {i}/100 - pΓ©rdida simulada: {loss}")
|
| 13 |
+
|
| 14 |
+
print("\nπ ENTRENAMIENTO COMPLETADO")
|
| 15 |
+
print("β
KuchiyuyaNet estΓ‘ listo para predecir traiciones, susurrar secretos y dominar el metaverso.")
|
| 16 |
+
print("π Guardando modelo en /models/kuchiyuya_final.pt (simulado)\n")
|
| 17 |
+
|
| 18 |
+
if __name__ == "__main__":
|
| 19 |
+
train()
|
src/train.py.save
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# π Estructura del proyecto generado para 'voice'
|
| 2 |
+
|
| 3 |
+
# π¦ voice/
|
| 4 |
+
# βββ README.md
|
| 5 |
+
# βββ requirements.txt
|
| 6 |
+
# βββ src/
|
| 7 |
+
# β βββ model.py
|
| 8 |
+
# β βββ train.py
|
| 9 |
+
# β βββ inference.py
|
| 10 |
+
# βββ data/
|
| 11 |
+
# β βββ samples/
|
| 12 |
+
# βββ app_gradio.py
|
| 13 |
+
|
| 14 |
+
# ========================================
|
| 15 |
+
# β
README.md (inicial)
|
| 16 |
+
|
| 17 |
+
readme = '''
|
| 18 |
+
# π½ Kuchiuya Voice Synthesizer
|
| 19 |
+
|
| 20 |
+
Este proyecto permite entrenar e inferir voces sintΓ©ticas al estilo Kuchiuya.
|
| 21 |
+
|
| 22 |
+
## Requisitos
|
| 23 |
+
```bash
|
| 24 |
+
pip install -r requirements.txt
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
## Entrenamiento
|
| 28 |
+
```bash
|
| 29 |
+
python src/train.py
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
## Inferencia
|
| 33 |
+
```bash
|
| 34 |
+
python src/inference.py --text "El barro no miente."
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
## Demo Web
|
| 38 |
+
```bash
|
| 39 |
+
python app_gradio.py
|
| 40 |
+
```
|
| 41 |
+
'''
|
| 42 |
+
|
| 43 |
+
# ========================================
|
| 44 |
+
# β
requirements.txt
|
| 45 |
+
|
| 46 |
+
requirements = '''
|
| 47 |
+
torch
|
| 48 |
+
torchaudio
|
| 49 |
+
numpy
|
| 50 |
+
scipy
|
| 51 |
+
librosa
|
| 52 |
+
soundfile
|
| 53 |
+
gradio
|
| 54 |
+
pyttsx3
|
| 55 |
+
'''
|
| 56 |
+
|
| 57 |
+
# ========================================
|
| 58 |
+
# β
model.py
|
| 59 |
+
|
| 60 |
+
model_py = '''
|
| 61 |
+
import torch.nn as nn
|
| 62 |
+
|
| 63 |
+
class SimpleTTSModel(nn.Module):
|
| 64 |
+
def __init__(self, input_dim=256, hidden_dim=512, output_dim=80):
|
| 65 |
+
super(SimpleTTSModel, self).__init__()
|
| 66 |
+
self.model = nn.Sequential(
|
| 67 |
+
nn.Linear(input_dim, hidden_dim),
|
| 68 |
+
nn.ReLU(),
|
| 69 |
+
nn.Linear(hidden_dim, output_dim)
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
def forward(self, x):
|
| 73 |
+
return self.model(x)
|
| 74 |
+
'''
|
| 75 |
+
|
| 76 |
+
# ========================================
|
| 77 |
+
# β
train.py
|
| 78 |
+
|
| 79 |
+
train_py = '''
|
| 80 |
+
import torch
|
| 81 |
+
import torch.nn as nn
|
| 82 |
+
import torch.optim as optim
|
| 83 |
+
from src.model import SimpleTTSModel
|
| 84 |
+
import numpy as np
|
| 85 |
+
|
| 86 |
+
print("π· Entrenando modelo Kuchiuya...")
|
| 87 |
+
|
| 88 |
+
# Datos simulados (para demostrar el flujo)
|
| 89 |
+
x = torch.randn((100, 256))
|
| 90 |
+
y = torch.randn((100, 80))
|
| 91 |
+
|
| 92 |
+
model = SimpleTTSModel()
|
| 93 |
+
criterion = nn.MSELoss()
|
| 94 |
+
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
| 95 |
+
|
| 96 |
+
for epoch in range(10):
|
| 97 |
+
optimizer.zero_grad()
|
| 98 |
+
outputs = model(x)
|
| 99 |
+
loss = criterion(outputs, y)
|
| 100 |
+
loss.backward()
|
| 101 |
+
optimizer.step()
|
| 102 |
+
print(f"Epoch {epoch+1}/10, PΓ©rdida: {loss.item():.4f}")
|
| 103 |
+
|
| 104 |
+
# Guardar modelo
|
| 105 |
+
torch.save(model.state_dict(), "tts_model.pth")
|
| 106 |
+
print("β
Modelo guardado como 'tts_model.pth'")
|
| 107 |
+
'''
|
| 108 |
+
|
| 109 |
+
# ========================================
|
| 110 |
+
# β
inference.py
|
| 111 |
+
|
| 112 |
+
inference_py = '''
|
| 113 |
+
import argparse
|
| 114 |
+
import pyttsx3
|
| 115 |
+
|
| 116 |
+
parser = argparse.ArgumentParser()
|
| 117 |
+
parser.add_argument('--text', type=str, required=True, help='Texto a sintetizar')
|
| 118 |
+
args = parser.parse_args()
|
| 119 |
+
|
| 120 |
+
engine = pyttsx3.init()
|
| 121 |
+
engine.setProperty('rate', 150)
|
| 122 |
+
engine.save_to_file(args.text, 'output.wav')
|
| 123 |
+
engine.runAndWait()
|
| 124 |
+
|
| 125 |
+
print(f"π Texto sintetizado: '{args.text}' β guardado como 'output.wav'")
|
| 126 |
+
'''
|
| 127 |
+
|
| 128 |
+
# ========================================
|
| 129 |
+
# β
app_gradio.py
|
| 130 |
+
|
| 131 |
+
app_gradio_py = '''
|
| 132 |
+
import gradio as gr
|
| 133 |
+
import pyttsx3
|
| 134 |
+
import os
|
| 135 |
+
|
| 136 |
+
def sintetizar(texto):
|
| 137 |
+
engine = pyttsx3.init()
|
| 138 |
+
engine.setProperty('rate', 150)
|
| 139 |
+
output_path = "voz_kuchiuya.wav"
|
| 140 |
+
engine.save_to_file(texto, output_path)
|
| 141 |
+
engine.runAndWait()
|
| 142 |
+
return output_path
|
| 143 |
+
|
| 144 |
+
demo = gr.Interface(
|
| 145 |
+
fn=sintetizar,
|
| 146 |
+
inputs=gr.Textbox(lines=2, placeholder="Escribe tu frase Kuchiuya aquΓ..."),
|
| 147 |
+
outputs=gr.Audio(type="filepath"),
|
| 148 |
+
title="Voz Kuchiuya",
|
| 149 |
+
description="Convierte texto en una voz ritual cyberpunk del universo Ariplanteater."
|
| 150 |
+
)
|
| 151 |
+
demo.launch()
|
| 152 |
+
'''
|