Improve response generation by tokenizing input and streaming word by word

Update `logica.py` to tokenize user input and use token-based similarity for matching responses. Modify `app.py` to stream responses token by token, simulating a real AI model's output.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: e3ff2484-bbd8-4aba-bea0-1940769b874a
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Event-Id: 38e4ae2b-4ea5-4fa5-a9cf-327acce0f548
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/1739408b-93a5-479b-a658-30f2493b0467/e3ff2484-bbd8-4aba-bea0-1940769b874a/vyYjoCT
Replit-Helium-Checkpoint-Created: true

Files changed (2) hide show

chat-app/app.py +19 -8
chat-app/logica.py +93 -31

chat-app/app.py CHANGED Viewed

@@ -18,11 +18,22 @@ def añadir_turno(historial, user_msg, bot_msg=""):
         {"role": "assistant", "content": bot_msg},
     ]
-def stream_texto(texto, historial):
-    CHUNK = 8
-    for i in range(0, len(texto), CHUNK):
-        historial[-1]["content"] += texto[i:i+CHUNK]
-        time.sleep(0.02)
         yield historial
 def responder(mensaje, historial):
@@ -40,7 +51,7 @@ def responder(mensaje, historial):
             "_Escribe tu operación o di 'salir de calculadora' para volver._"
         )
         historial = añadir_turno(historial, mensaje)
-        for h in stream_texto(saludo, historial):
             yield h, ""
         return
@@ -54,7 +65,7 @@ def responder(mensaje, historial):
         if resultado is not None:
             respuesta = formatear_resultado(mensaje, resultado)
             historial = añadir_turno(historial, mensaje)
-            for h in stream_texto(respuesta, historial):
                 yield h, ""
             return
@@ -84,7 +95,7 @@ def responder(mensaje, historial):
     respuesta_personalizada = buscar_respuesta_personalizada(mensaje)
     if respuesta_personalizada:
         historial = añadir_turno(historial, mensaje)
-        for h in stream_texto(respuesta_personalizada, historial):
             yield h, ""
         return

         {"role": "assistant", "content": bot_msg},
     ]
+def stream_tokens(texto, historial):
+    """
+    Emite la respuesta token por token (palabra por palabra),
+    simulando el proceso de generación de un modelo de lenguaje real.
+    Tokens largos = pausas ligeramente mayores (más "peso" semántico).
+    """
+    tokens = texto.split(" ")
+    acumulado = ""
+    for i, token in enumerate(tokens):
+        acumulado += token
+        if i < len(tokens) - 1:
+            acumulado += " "
+        historial[-1]["content"] = acumulado
+        # Pausa variable: tokens largos tardan un poco más (como un LLM real)
+        delay = 0.055 if len(token) > 4 else 0.030
+        time.sleep(delay)
         yield historial
 def responder(mensaje, historial):
             "_Escribe tu operación o di 'salir de calculadora' para volver._"
         )
         historial = añadir_turno(historial, mensaje)
+        for h in stream_tokens(saludo, historial):
             yield h, ""
         return
         if resultado is not None:
             respuesta = formatear_resultado(mensaje, resultado)
             historial = añadir_turno(historial, mensaje)
+            for h in stream_tokens(respuesta, historial):
                 yield h, ""
             return
     respuesta_personalizada = buscar_respuesta_personalizada(mensaje)
     if respuesta_personalizada:
         historial = añadir_turno(historial, mensaje)
+        for h in stream_tokens(respuesta_personalizada, historial):
             yield h, ""
         return

chat-app/logica.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import re
 import json
 from roblox_api import buscar_jugador, buscar_juego, formatear_jugador, formatear_juego
 from matematicas import (
     es_solicitud_calculadora, es_operacion_matematica,
@@ -40,22 +41,105 @@ def cargar_respuestas():
 RESPUESTAS_PERSONALIZADAS = cargar_respuestas()
 def buscar_respuesta_personalizada(mensaje):
-    texto = mensaje.lower().strip()
     for entrada in RESPUESTAS_PERSONALIZADAS:
         for pregunta in entrada.get("preguntas", []):
-            if pregunta.lower() in texto or texto in pregunta.lower():
-                return entrada.get("respuesta")
     return None
 def generar_explicacion_juego(datos):
-    nombre = datos.get("nombre", "Este juego")
-    tema = datos.get("tema", "")
     descripcion = datos.get("descripcion", "").strip()
-    visitas = datos.get("visitas", 0)
-    creador = datos.get("creador", "")
-    tipo = GENEROS_ROBLOX.get(tema, f"un juego de {tema.lower()}" if tema else "un juego")
     partes = [f"**{nombre}** es {tipo} de Roblox creado por **{creador}**."]
     if descripcion and descripcion != "Sin descripción.":
@@ -110,29 +194,7 @@ def detectar_roblox(mensaje):
             return "juego", m.group(1).strip()
     return None, None
-def extraer_texto_content(content):
-    if not content:
-        return ""
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        partes = []
-        for bloque in content:
-            if isinstance(bloque, str):
-                partes.append(bloque)
-            elif isinstance(bloque, dict):
-                partes.append(str(bloque.get("text") or bloque.get("value") or bloque.get("content") or ""))
-        return " ".join(partes)
-    return str(content)
-def modo_calculadora_activo(historial):
-    if not historial:
-        return False
-    for msg in reversed(historial):
-        if isinstance(msg, dict) and msg.get("role") == "assistant":
-            texto = extraer_texto_content(msg.get("content")).lower()
-            return "calculadora neo-1" in texto or "aquí tienes nuestra calculadora" in texto
-    return False
 def respuesta_final(mensaje, historial):
     texto = mensaje.strip().lower()

 import os
 import re
 import json
+import unicodedata
 from roblox_api import buscar_jugador, buscar_juego, formatear_jugador, formatear_juego
 from matematicas import (
     es_solicitud_calculadora, es_operacion_matematica,
 RESPUESTAS_PERSONALIZADAS = cargar_respuestas()
+# ── TOKENIZADOR ────────────────────────────────────────────────────────────────
+def tokenizar(texto):
+    """
+    Convierte texto a lista de tokens normalizados:
+    1. Elimina tildes/acentos
+    2. Convierte a minúsculas
+    3. Elimina puntuación
+    4. Divide en palabras (tokens)
+    """
+    # 1. Normalizar unicode → eliminar acentos
+    texto = unicodedata.normalize("NFD", texto)
+    texto = "".join(c for c in texto if unicodedata.category(c) != "Mn")
+    # 2. Minúsculas
+    texto = texto.lower()
+    # 3. Eliminar puntuación (conservar letras, números y espacios)
+    texto = re.sub(r"[^\w\s]", " ", texto)
+    # 4. Dividir en tokens
+    return [t for t in texto.split() if t]
+def similitud_tokens(tokens_entrada, tokens_patron):
+    """
+    Calcula similitud Jaccard entre dos listas de tokens.
+    También da bonus si el patrón es subconjunto del mensaje.
+    Retorna un score entre 0.0 y 1.0
+    """
+    set_entrada = set(tokens_entrada)
+    set_patron  = set(tokens_patron)
+    if not set_patron:
+        return 0.0
+    interseccion = set_entrada & set_patron
+    union        = set_entrada | set_patron
+    jaccard      = len(interseccion) / len(union)
+    # Bonus: si todos los tokens del patrón están en la entrada
+    if set_patron.issubset(set_entrada):
+        jaccard = max(jaccard, 0.80)
+    return jaccard
 def buscar_respuesta_personalizada(mensaje):
+    """
+    Busca la mejor respuesta usando similitud de tokens.
+    Umbral mínimo: 0.20 (al menos 20% de overlap Jaccard)
+    """
+    tokens_entrada = tokenizar(mensaje)
+    mejor_respuesta = None
+    mejor_score = 0.0
+    UMBRAL = 0.20
     for entrada in RESPUESTAS_PERSONALIZADAS:
         for pregunta in entrada.get("preguntas", []):
+            tokens_patron = tokenizar(pregunta)
+            score = similitud_tokens(tokens_entrada, tokens_patron)
+            if score > mejor_score:
+                mejor_score = score
+                mejor_respuesta = entrada.get("respuesta")
+    if mejor_score >= UMBRAL:
+        return mejor_respuesta
     return None
+# ── UTILIDADES ─────────────────────────────────────────────────────────────────
+def extraer_texto_content(content):
+    if not content:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        partes = []
+        for bloque in content:
+            if isinstance(bloque, str):
+                partes.append(bloque)
+            elif isinstance(bloque, dict):
+                partes.append(str(bloque.get("text") or bloque.get("value") or bloque.get("content") or ""))
+        return " ".join(partes)
+    return str(content)
+def modo_calculadora_activo(historial):
+    if not historial:
+        return False
+    for msg in reversed(historial):
+        if isinstance(msg, dict) and msg.get("role") == "assistant":
+            texto = extraer_texto_content(msg.get("content")).lower()
+            return "calculadora neo-1" in texto or "aquí tienes nuestra calculadora" in texto
+    return False
+# ── ROBLOX ─────────────────────────────────────────────────────────────────────
 def generar_explicacion_juego(datos):
+    nombre     = datos.get("nombre", "Este juego")
+    tema       = datos.get("tema", "")
     descripcion = datos.get("descripcion", "").strip()
+    visitas    = datos.get("visitas", 0)
+    creador    = datos.get("creador", "")
+    tipo   = GENEROS_ROBLOX.get(tema, f"un juego de {tema.lower()}" if tema else "un juego")
     partes = [f"**{nombre}** es {tipo} de Roblox creado por **{creador}**."]
     if descripcion and descripcion != "Sin descripción.":
             return "juego", m.group(1).strip()
     return None, None
+# ── RESPUESTA FINAL (no-streaming, usada por neo_rest.py) ─────────────────────
 def respuesta_final(mensaje, historial):
     texto = mensaje.strip().lower()