Spaces:

tecuhtli
/

Mori_Bot

Running

File size: 13,575 Bytes

# -*- coding: utf-8 -*-
"""Mori – Inferencia Técnica (estable, UTF-8, con opción RAG ON/OFF)"""
#=====================================================================================
# Importing Libraries  ===============================================================
#=====================================================================================
import os, warnings, json, random, uuid, csv
import numpy as np
import streamlit as st
import datetime as dt
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from Mori_TechnicalPrompts import answer_with_mori_rag, answer_with_mori_plain
import torch
from huggingface_hub import hf_hub_download, login
from sentence_transformers import SentenceTransformer  # RAG embeddings
#***************************************************************************
#Setting up variables
#***************************************************************************
# Token privado desde variable de entorno
HF_TOKEN = os.environ.get("HF_TOKEN")

#***************************************************************************
#Loading FAISS Vec DB
#***************************************************************************
REPO_ID = "tecuhtli/Mori_FAISS_Full"


#***************************************************************************
# Sidebar controls for generation params
#***************************************************************************

def sidebar_params():
    with st.sidebar:
        st.title("🎮 Personalidad (FLAN-T5)")

        ss = st.session_state

        # Estado inicial
        ss.setdefault("show_llm_controls", False)
        ss.setdefault("persona", "Mori Exacto")
        ss.setdefault("mode", "beam")  # 'beam' | 'sampling'
        ss.setdefault("max_new", 128)
        ss.setdefault("min_tok", 16)
        ss.setdefault("no_repeat", 3)
        ss.setdefault("num_beams", 4)
        ss.setdefault("length_penalty", 1.0)
        ss.setdefault("temperature", 0.7)
        ss.setdefault("top_p", 0.9)
        ss.setdefault("repetition_penalty", 1.0)

        # ----------------------------
        # Personalidad (presets)
        # ----------------------------
        st.header("💡 Personalidades predefinidas")
        c1, c2 = st.columns(2)

        with c1:
            if st.button("Exacto 🧐", use_container_width=True):
                ss.update({"persona": "exacto"})
                st.rerun()

        with c2:
            if st.button("Creativo 😃", use_container_width=True):
                ss.update({"persona": "creativo"})
                st.rerun()

        st.caption(f"Personalidad actual: **{ss.persona}**")

        st.markdown("---")
        st.title("👀 RAG")
        ss.setdefault("use_rag", True)
        ss.setdefault("rag_k", 1)
        ss.use_rag = st.checkbox(
            "Usar RAG (FAISS + One-Shot)",
            value=ss.use_rag,
            help="Recupera evidencias de la base FAISS de Mori en Hugging Face y las usa en el prompt."
        )

        st.markdown("---")
        st.title("🧾 Vista previa del Prompt")

        if "last_prompt" in ss and ss["last_prompt"]:
            with st.expander("Mostrar prompt generado"):
                st.text_area(
                    "Prompt actual:",
                    ss["last_prompt"],
                    height=200,
                    disabled=True
                )
        else:
            st.caption("👉 Aún no se ha generado ningún prompt.")

        # ----------------------------
        # Construir diccionario de parámetros
        # ----------------------------
        params = {
            "persona": ss.persona,
            "mode": ss.mode,
            "max_new_tokens": int(ss.max_new),
            "min_tokens": int(ss.min_tok),
            "no_repeat_ngram_size": int(ss.no_repeat),
            "repetition_penalty": float(ss.repetition_penalty),
        }

        return params

#***************************************************************************
# Functions
#***************************************************************************

# Function to clean the question field (por si luego lo quieres usar en un botón)
def limpiar_input():
    st.session_state["entrada"] = ""

# ✅ Corrige la ruta correctamente desde Scripts hacia Models
def get_model_path(folder_name):
    return Path("Models") / folder_name

# Function to save user interaction
def saving_interaction(question, response, user_id, use_of_rag, bot_personality):
    """
    Guarda la interacción en CSV y JSONL para análisis posterior.
    """
    timestamp = dt.datetime.now().isoformat()
    stats_dir = Path("Statistics")
    stats_dir.mkdir(parents=True, exist_ok=True)

    archivo_csv = stats_dir / "conversaciones_log.csv"
    existe_csv = archivo_csv.exists()

    # CSV
    with open(archivo_csv, mode="a", encoding="utf-8", newline="") as f_csv:
        writer = csv.writer(f_csv)
        if not existe_csv:
            writer.writerow(["timestamp", "user_id", "pregunta", "respuesta", "rag", "personality"])
        writer.writerow([timestamp, user_id, question, response, use_of_rag, bot_personality])

    # JSONL
    archivo_jsonl = stats_dir / "conversaciones_log.jsonl"
    with open(archivo_jsonl, mode="a", encoding="utf-8") as f_jsonl:
        registro = {
            "timestamp": timestamp,
            "user_id": user_id,
            "pregunta": question,
            "respuesta": response,
            "uso_rag": use_of_rag,
            "personality": bot_personality
        }
        f_jsonl.write(json.dumps(registro, ensure_ascii=False) + "\n")

# Function to load models within the huggingface repositories space
@st.cache_resource
def load_remote_model(repo_id: str, token: str = None):
    tokenizer = AutoTokenizer.from_pretrained(repo_id, token=token)
    model = AutoModelForSeq2SeqLM.from_pretrained(repo_id, token=token)
    return model, tokenizer
#-------------------------------------------------------------------------
# Seeds
#-------------------------------------------------------------------------

def set_seeds(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

#***************************************************************************
# MAIN
#***************************************************************************

if __name__ == "__main__":
    # Semilla para reproducibilidad
    set_seeds(42)

    # --- Estado que debe persistir en todos los reruns ---
    ss = st.session_state
    ss.setdefault("historial", [])
    ss.setdefault("last_prompt", "")
    ss.setdefault("last_response", "")
    ss.setdefault("just_generated", False)

    # Sidebar (control total)
    GEN_PARAMS = sidebar_params()
    GEN_PARAMS["persona"] = ss.persona  # por si acaso

    # Assigning a new ID to the current user
    if "user_id" not in ss:
        ss["user_id"] = str(uuid.uuid4())[:8]


    # Modelo Técnico
    MODEL_REPO_ID = "tecuhtli/mori-tecnico-model"
    model, tokenizer = load_remote_model(MODEL_REPO_ID, HF_TOKEN)
    
    # Presentación de Mori
    st.title("🤖 Mori - Tu Asistente Personal ⌨️")

    st.caption("🙋🏽‍ Puedes preguntarme conceptos sobre machine learning, estadística, visualización, BI, limpieza de datos y más.")
    st.caption("🙇🏽‍ Por el momento, solo puedo contestar preguntas simples como:")

    st.caption("  🔹 **Definiciones** — Ejemplo: *¿Qué es machine learning?*")
    st.caption("  🔹 **Procedimientos** — Ejemplo: *¿Cómo limpiar datos?*")
    st.caption("  🔹 **Funcionalidad** — Ejemplo: *¿Para qué sirve un autoencoder?*")

    st.markdown("<br>", unsafe_allow_html=True)

    st.caption("🦾 Aún estoy aprendiendo. Puedes ver mi desarrollo aquí:")
    st.caption("[hazutecuhtli.github.io](https://github.com/hazutecuhtli/Mori_Development)")

    st.markdown("<br>", unsafe_allow_html=True)

    st.caption("✏️ Escribe tu pregunta abajo.")

    # 🔁 Limpieza segura antes del formulario
    if ss.pop("_clear_entrada", False):
        if "entrada" in ss:
            del ss["entrada"]

    # 🧠 Flash de respuesta (la guardamos, pero la mostraremos después del form)
    _flash = ss.pop("_flash_response", None)

    # Formulario principal
    with st.form("formulario_mori"):
        user_question = st.text_area("📝 Escribe tu pregunta aquí", key="entrada", height=100)
        submitted = st.form_submit_button("Responder")

    if submitted:
        if not user_question:
            st.info("Mori: ¿Podrías repetir eso? No entendí bien 😅")
        else:
            use_rag = st.session_state.get("use_rag", False)
            
            persona = GEN_PARAMS.get("persona", ss.persona)

            if use_rag:
                use_of_rag = 'Con RAG'
                response, prompt = answer_with_mori_rag(
                    tokenizer, model, user_question,
                    modo=persona,
                    verbose=False
                )
            else:
                use_of_rag = 'Sin RAG'
                response, prompt = answer_with_mori_plain(
                    tokenizer, model, user_question,
                    modo=persona
                )

            ss["last_prompt"] = prompt
            ss["just_generated"] = True

            # 🧠 Guarda historial
            hora_actual = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            ss.historial.append(("Tú", user_question, hora_actual))

            hora_actual = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            ss.historial.append(("Mori", response, hora_actual, use_of_rag, persona))

            # 💾 Guarda conversación
            saving_interaction(user_question, response, ss["user_id"], use_of_rag, persona)

            # 🟩 Guarda respuesta para mostrar después del rerun
            ss["_flash_response"] = response

            # 🧼 Limpieza del textarea en el próximo ciclo
            ss["_clear_entrada"] = True

            # ♻️ Forzar refresh (sidebar verá el nuevo prompt)
            st.rerun()

    # -----------------------------------------------------------
    # 💬 Mostrar la respuesta actual (flash) justo aquí ↓↓↓
    # -----------------------------------------------------------
    if _flash:
        st.success(_flash)

    # 🔁 Historial con estilo chat y contenedor con scroll
    if ss.historial:
        st.markdown("---")

        # 💾 Botón de descarga arriba del historial
        lineas = []
        for msg in reversed(ss.historial):
            if len(msg) == 5:
                autor, texto, hora, rag, bot_per = msg
                lineas.append(f"[{hora}] {autor}: {texto} RAG:{rag} Persoality:{bot_per}")
            else:
                autor, texto, hora = msg
                lineas.append(f"[{hora}] {autor}: {texto}")
        texto_chat = "\n\n".join(lineas)

        st.download_button(
            label="💾 Descargar conversación como .txt",
            data=texto_chat,
            file_name="conversacion_mori.txt",
            mime="text/plain",
            use_container_width=True
        )

        # 🪟 Contenedor con scroll y burbujas
        st.markdown(
            """
            <div id="chat-container" style="
                max-height: 400px;
                overflow-y: auto;
                padding: 10px;
                border: 1px solid #333;
                border-radius: 10px;
                background: linear-gradient(180deg, #0e0e0e 0%, #1b1b1b 100%);
                margin-top: 10px;
            ">
            """,
            unsafe_allow_html=True
        )

        for msg in reversed(ss.historial):
            if len(msg) == 5:
                autor, texto, hora, rag, bot_per = msg
            else:
                autor, texto, hora = msg

            if autor == "Tú":
                st.markdown(
                    f"""
                    <div style="
                        text-align: right;
                        background-color: #2d2d2d;
                        color: #e6e6e6;
                        padding: 10px 14px;
                        border-radius: 12px;
                        margin: 6px 0;
                        border: 1px solid #3a3a3a;
                        display: inline-block;
                        max-width: 80%;
                        float: right;
                        clear: both;
                    ">
                        🧍‍♂️ <b>{autor}:</b> {texto}
                    </div>
                    """,
                    unsafe_allow_html=True
                )
            else:
                st.markdown(
                    f"""
                    <div style="
                        text-align: left;
                        background-color: #162b1f;
                        color: #d9ead3;
                        padding: 10px 14px;
                        border-radius: 12px;
                        margin: 6px 0;
                        border: 1px solid #264d36;
                        display: inline-block;
                        max-width: 80%;
                        float: left;
                        clear: both;
                    ">
                        🤖 <b>{autor}:</b> {texto}
                    </div>
                    """,
                    unsafe_allow_html=True
                )

        st.markdown("</div>", unsafe_allow_html=True)