Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,13 +6,12 @@ from sklearn.cluster import KMeans
|
|
| 6 |
import torch
|
| 7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 8 |
|
| 9 |
-
# ----
|
| 10 |
-
MODEL_NAME = "gpt2"
|
| 11 |
-
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 13 |
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
|
| 14 |
|
| 15 |
-
# ----
|
| 16 |
class DigitalToneDecoder:
|
| 17 |
def __init__(self, frame_ms=40, min_freq=300, max_freq=4000, peak_threshold=0.2, symbols=16):
|
| 18 |
self.frame_ms = frame_ms
|
|
@@ -40,22 +39,17 @@ class DigitalToneDecoder:
|
|
| 40 |
def detectar_tonos(self, stft, freqs):
|
| 41 |
tonos = []
|
| 42 |
media_global = np.mean(stft)
|
| 43 |
-
|
| 44 |
for frame in stft.T:
|
| 45 |
if np.mean(frame) < media_global * 0.5:
|
| 46 |
continue
|
| 47 |
-
|
| 48 |
frame_norm = frame / np.max(frame)
|
| 49 |
peaks, _ = find_peaks(frame_norm, height=self.peak_threshold)
|
| 50 |
if len(peaks) == 0:
|
| 51 |
continue
|
| 52 |
-
|
| 53 |
peak = peaks[np.argmax(frame_norm[peaks])]
|
| 54 |
freq = freqs[peak]
|
| 55 |
-
|
| 56 |
if self.min_freq <= freq <= self.max_freq:
|
| 57 |
tonos.append(freq)
|
| 58 |
-
|
| 59 |
return np.array(tonos)
|
| 60 |
|
| 61 |
def crear_simbolos(self, tonos):
|
|
@@ -79,13 +73,25 @@ class DigitalToneDecoder:
|
|
| 79 |
out += letras[idx % len(letras)]
|
| 80 |
return out
|
| 81 |
|
| 82 |
-
# ----
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
def generar_texto_ia(letras):
|
|
|
|
| 84 |
prompt = (
|
| 85 |
f"Secuencia de letras: {letras}\n"
|
| 86 |
-
"
|
|
|
|
|
|
|
| 87 |
)
|
| 88 |
-
|
| 89 |
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
| 90 |
out = model.generate(
|
| 91 |
input_ids,
|
|
@@ -98,7 +104,7 @@ def generar_texto_ia(letras):
|
|
| 98 |
texto = tokenizer.decode(out[0], skip_special_tokens=True)
|
| 99 |
return texto
|
| 100 |
|
| 101 |
-
# ----
|
| 102 |
def analizar(audio_path, progress=gr.Progress()):
|
| 103 |
if audio_path is None:
|
| 104 |
return "Sin audio"
|
|
@@ -113,18 +119,18 @@ def analizar(audio_path, progress=gr.Progress()):
|
|
| 113 |
progress(0.8)
|
| 114 |
centros = decoder.crear_simbolos(tonos)
|
| 115 |
seq = decoder.decodificar(tonos, centros)
|
| 116 |
-
|
| 117 |
-
ia = generar_texto_ia(seq)
|
| 118 |
|
| 119 |
reporte = ""
|
| 120 |
reporte += "SECUENCIA DETECTADA\n"
|
| 121 |
reporte += seq + "\n\n"
|
| 122 |
reporte += "TEXTO GENERADO POR IA\n"
|
| 123 |
-
reporte +=
|
| 124 |
return reporte
|
| 125 |
|
|
|
|
| 126 |
with gr.Blocks() as demo:
|
| 127 |
-
gr.Markdown("# Decodificador + Generador de Palabras/Frases con
|
| 128 |
audio = gr.Audio(type="filepath", sources=["upload","microphone"])
|
| 129 |
boton = gr.Button("Procesar")
|
| 130 |
salida = gr.Textbox(lines=15)
|
|
|
|
| 6 |
import torch
|
| 7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 8 |
|
| 9 |
+
# ---- Modelo GPT-2 ----
|
| 10 |
+
MODEL_NAME = "gpt2"
|
|
|
|
| 11 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 12 |
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
|
| 13 |
|
| 14 |
+
# ---- Decodificador de Tonos ----
|
| 15 |
class DigitalToneDecoder:
|
| 16 |
def __init__(self, frame_ms=40, min_freq=300, max_freq=4000, peak_threshold=0.2, symbols=16):
|
| 17 |
self.frame_ms = frame_ms
|
|
|
|
| 39 |
def detectar_tonos(self, stft, freqs):
|
| 40 |
tonos = []
|
| 41 |
media_global = np.mean(stft)
|
|
|
|
| 42 |
for frame in stft.T:
|
| 43 |
if np.mean(frame) < media_global * 0.5:
|
| 44 |
continue
|
|
|
|
| 45 |
frame_norm = frame / np.max(frame)
|
| 46 |
peaks, _ = find_peaks(frame_norm, height=self.peak_threshold)
|
| 47 |
if len(peaks) == 0:
|
| 48 |
continue
|
|
|
|
| 49 |
peak = peaks[np.argmax(frame_norm[peaks])]
|
| 50 |
freq = freqs[peak]
|
|
|
|
| 51 |
if self.min_freq <= freq <= self.max_freq:
|
| 52 |
tonos.append(freq)
|
|
|
|
| 53 |
return np.array(tonos)
|
| 54 |
|
| 55 |
def crear_simbolos(self, tonos):
|
|
|
|
| 73 |
out += letras[idx % len(letras)]
|
| 74 |
return out
|
| 75 |
|
| 76 |
+
# ---- Preprocesamiento de letras ----
|
| 77 |
+
def limpiar_repeticiones(texto):
|
| 78 |
+
resultado = ""
|
| 79 |
+
prev = None
|
| 80 |
+
for c in texto:
|
| 81 |
+
if c != prev:
|
| 82 |
+
resultado += c
|
| 83 |
+
prev = c
|
| 84 |
+
return resultado
|
| 85 |
+
|
| 86 |
+
# ---- Generador de palabras/frases con IA ----
|
| 87 |
def generar_texto_ia(letras):
|
| 88 |
+
letras = limpiar_repeticiones(letras)
|
| 89 |
prompt = (
|
| 90 |
f"Secuencia de letras: {letras}\n"
|
| 91 |
+
"Interpreta estas letras como posibles palabras o mensaje encriptado en otras dimensiones de la matrix,
|
| 92 |
+
y luego construye frases con ellas si lo ves conveniente, si no. "
|
| 93 |
+
"Devuelve solo palabras y frases separadas en espa帽ol o ingles, sin URLs ni texto irrelevante:\n"
|
| 94 |
)
|
|
|
|
| 95 |
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
| 96 |
out = model.generate(
|
| 97 |
input_ids,
|
|
|
|
| 104 |
texto = tokenizer.decode(out[0], skip_special_tokens=True)
|
| 105 |
return texto
|
| 106 |
|
| 107 |
+
# ---- Funci贸n principal ----
|
| 108 |
def analizar(audio_path, progress=gr.Progress()):
|
| 109 |
if audio_path is None:
|
| 110 |
return "Sin audio"
|
|
|
|
| 119 |
progress(0.8)
|
| 120 |
centros = decoder.crear_simbolos(tonos)
|
| 121 |
seq = decoder.decodificar(tonos, centros)
|
| 122 |
+
ia_text = generar_texto_ia(seq)
|
|
|
|
| 123 |
|
| 124 |
reporte = ""
|
| 125 |
reporte += "SECUENCIA DETECTADA\n"
|
| 126 |
reporte += seq + "\n\n"
|
| 127 |
reporte += "TEXTO GENERADO POR IA\n"
|
| 128 |
+
reporte += ia_text
|
| 129 |
return reporte
|
| 130 |
|
| 131 |
+
# ---- Interfaz Gradio ----
|
| 132 |
with gr.Blocks() as demo:
|
| 133 |
+
gr.Markdown("# Decodificador + Generador de Palabras/Frases con GPT-2")
|
| 134 |
audio = gr.Audio(type="filepath", sources=["upload","microphone"])
|
| 135 |
boton = gr.Button("Procesar")
|
| 136 |
salida = gr.Textbox(lines=15)
|