Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import librosa | |
| from scipy.fft import fft, fftfreq | |
| from datasets import load_dataset | |
| ABECEDARIO = { | |
| 'A': 300,'B': 400,'C': 500,'D': 600,'E': 700,'F': 800,'G': 900,'H': 1000, | |
| 'I': 1100,'J': 1200,'K': 1300,'L': 1400,'M': 1500,'N': 1600,'O': 1700, | |
| 'P': 1800,'Q': 1900,'R': 2000,'S': 2100,'T': 2200,'U': 2300,'V': 2400, | |
| 'W': 2500,'X': 2600,'Y': 2700,'Z': 2800,' ': 0 | |
| } | |
| FRECUENCIAS = np.array(list(ABECEDARIO.values())) | |
| LETRAS = list(ABECEDARIO.keys()) | |
| ds = load_dataset("brianmwigo/spanish_dataset", split="train") | |
| PALABRAS_REALES = set() | |
| for txt in ds["text"]: | |
| for w in txt.split(): | |
| PALABRAS_REALES.add(w.strip().upper()) | |
| def decodificar_audio(audio_path): | |
| if audio_path is None: | |
| return "⚠️ Sin audio" | |
| y, sr = librosa.load(audio_path, sr=None, mono=True) | |
| secuencia = "" | |
| ventana_ms = 50 | |
| ventana_len = int(sr * ventana_ms / 1000) | |
| for start in range(0, len(y), ventana_len): | |
| frame = y[start:start+ventana_len] | |
| if len(frame) == 0: continue | |
| yf_frame = fft(frame) | |
| magn = np.abs(yf_frame[:len(frame)//2]) | |
| freqs = fftfreq(len(frame), 1/sr)[:len(frame)//2] | |
| magn_rel = magn / (np.sum(magn) + 1e-9) | |
| idx_max = np.argmax(magn_rel) | |
| freq_max = freqs[idx_max] | |
| idx_letra = (np.abs(FRECUENCIAS - freq_max)).argmin() | |
| secuencia += LETRAS[idx_letra] | |
| secuencia_limpia = "" | |
| prev = None | |
| for c in secuencia: | |
| if c != prev: | |
| secuencia_limpia += c | |
| prev = c | |
| letras_disponibles = set(secuencia_limpia) | |
| palabras_validas = [w for w in PALABRAS_REALES if set(w).issubset(letras_disponibles)] | |
| palabras_validas.sort(key=lambda x: -len(x)) | |
| frases = [] | |
| for i in range(0, len(palabras_validas), 5): | |
| frases.append(" ".join(palabras_validas[i:i+5])) | |
| reporte = "SECUENCIA DETECTADA\n" + secuencia + "\n\n" | |
| reporte += "SECUENCIA LIMPIA\n" + secuencia_limpia + "\n\n" | |
| reporte += "PALABRAS GENERADAS\n" + ", ".join(palabras_validas[:50]) + "\n\n" | |
| reporte += "FRASES GENERADAS\n" + "\n".join(frases[:10]) | |
| return reporte | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Detectar frecuencias → letras → palabras reales") | |
| audio = gr.Audio(type="filepath", sources=["upload","microphone"]) | |
| btn = gr.Button("Decodificar") | |
| salida = gr.Textbox(lines=20) | |
| btn.click(decodificar_audio, inputs=audio, outputs=salida) | |
| demo.launch() |