|
|
import os, sys
|
|
|
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
|
|
import streamlit as st
|
|
|
from pathlib import Path
|
|
|
import torch, json, csv, warnings
|
|
|
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
from pathlib import Path
|
|
|
from unidecode import unidecode
|
|
|
from datetime import datetime
|
|
|
import uuid
|
|
|
|
|
|
|
|
|
if "user_id" not in st.session_state:
|
|
|
st.session_state["user_id"] = str(uuid.uuid4())[:8]
|
|
|
|
|
|
def limpiar_input():
|
|
|
st.session_state["entrada"] = ""
|
|
|
|
|
|
|
|
|
def guardar_interaccion_dual(pregunta, respuesta, tipo, user_id):
|
|
|
timestamp = datetime.now().isoformat()
|
|
|
|
|
|
|
|
|
stats_dir = Path("Statistics")
|
|
|
stats_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
archivo_csv = stats_dir / "conversaciones_log.csv"
|
|
|
existe_csv = archivo_csv.exists()
|
|
|
|
|
|
with open(archivo_csv, mode="a", encoding="utf-8", newline="") as f_csv:
|
|
|
writer = csv.writer(f_csv)
|
|
|
if not existe_csv:
|
|
|
writer.writerow(["timestamp", "user_id", "tipo", "pregunta", "respuesta"])
|
|
|
writer.writerow([timestamp, user_id, tipo, pregunta, respuesta])
|
|
|
|
|
|
|
|
|
archivo_jsonl = stats_dir / "conversaciones_log.jsonl"
|
|
|
with open(archivo_jsonl, mode="a", encoding="utf-8") as f_jsonl:
|
|
|
registro = {
|
|
|
"timestamp": timestamp,
|
|
|
"user_id": user_id,
|
|
|
"tipo": tipo,
|
|
|
"pregunta": pregunta,
|
|
|
"respuesta": respuesta
|
|
|
}
|
|
|
f_jsonl.write(json.dumps(registro, ensure_ascii=False) + "\n")
|
|
|
|
|
|
|
|
|
def get_model_path(folder_name):
|
|
|
return Path("Models") / folder_name
|
|
|
|
|
|
|
|
|
@st.cache_resource
|
|
|
def load_model(path_str):
|
|
|
path = Path(path_str).resolve()
|
|
|
tokenizer = AutoTokenizer.from_pretrained(path, local_files_only=True)
|
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(path, local_files_only=True)
|
|
|
return model, tokenizer
|
|
|
|
|
|
def detectar_intencion(texto_usuario):
|
|
|
texto = unidecode(texto_usuario.lower())
|
|
|
|
|
|
social_keywords = [
|
|
|
"hola", "chiste", "como estas", "gracias", "que pex", "broma", "saludos", "eres", "estudiante", "preguntar algo",
|
|
|
"estas ahi", "que amable", "haces", "kiubo", "bro", "ey", "todo bien", "te puedo", "animo", "hasta luego", "me ayudas",
|
|
|
"motiva", "no entiendo", "te gusta", "futbol", "quien eres", "sentimientos", "canelo", "america", "chivas", "background",
|
|
|
"cuantos años", "proposito", "quien me habla", "te puedo preguntar", "ey bro", "quien te hizo", "que haces", "bonito",
|
|
|
"piropo", "pex", "pasion", "hambre", "camara", "cansado", "adios"
|
|
|
]
|
|
|
|
|
|
tecnico_keywords = [
|
|
|
"modelo", "entrenamiento", "algoritmo", "regresion", "clasificacion", "overfitting", "datos", "que es",
|
|
|
"define", "explicas", "pca", "cnn", "rnn", "clustering", "precision", "recall", "supervisado", "aprendizaje"
|
|
|
]
|
|
|
|
|
|
if any(p in texto for p in social_keywords):
|
|
|
return "Social"
|
|
|
elif any(p in texto for p in tecnico_keywords):
|
|
|
return "Técnica"
|
|
|
else:
|
|
|
return "Técnica"
|
|
|
|
|
|
|
|
|
def responder_social(texto_usuario, social_model, social_tokenizer):
|
|
|
device = next(social_model.parameters()).device
|
|
|
|
|
|
inputs = social_tokenizer(
|
|
|
texto_usuario,
|
|
|
return_tensors="pt",
|
|
|
padding=True,
|
|
|
truncation=True,
|
|
|
max_length=128
|
|
|
).to(device)
|
|
|
|
|
|
output_ids = social_model.generate(
|
|
|
input_ids=inputs["input_ids"],
|
|
|
attention_mask=inputs["attention_mask"],
|
|
|
max_length=50,
|
|
|
pad_token_id=social_tokenizer.eos_token_id,
|
|
|
do_sample=True,
|
|
|
top_p=0.95,
|
|
|
top_k=50
|
|
|
)
|
|
|
|
|
|
respuesta = social_tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
|
|
return respuesta.strip()
|
|
|
|
|
|
def responder_tecnico(texto_usuario, technical_model, technical_tokenizer):
|
|
|
entrada = "pregunta: " + texto_usuario
|
|
|
device = next(technical_model.parameters()).device
|
|
|
|
|
|
inputs = technical_tokenizer(
|
|
|
entrada,
|
|
|
return_tensors="pt",
|
|
|
padding=True,
|
|
|
truncation=True,
|
|
|
max_length=128
|
|
|
).to(device)
|
|
|
|
|
|
output = technical_model.generate(
|
|
|
input_ids=inputs["input_ids"],
|
|
|
attention_mask=inputs["attention_mask"],
|
|
|
max_length=64
|
|
|
)
|
|
|
|
|
|
respuesta = technical_tokenizer.decode(output[0], skip_special_tokens=True)
|
|
|
return respuesta.strip()
|
|
|
|
|
|
|
|
|
def responder_mori(texto_usuario):
|
|
|
intencion = detectar_intencion(texto_usuario)
|
|
|
|
|
|
if intencion == "Social":
|
|
|
return responder_social(texto_usuario)
|
|
|
elif intencion == "Técnica":
|
|
|
return responder_tecnico(texto_usuario)
|
|
|
else:
|
|
|
return responder_tecnico(texto_usuario)
|
|
|
|
|
|
|
|
|
|
|
|
if "historial" not in st.session_state:
|
|
|
st.session_state.historial = []
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer_tecnico = AutoTokenizer.from_pretrained("tecuhtli/mori-tecnico-model")
|
|
|
model_tecnico = AutoModelForSeq2SeqLM.from_pretrained("tecuhtli/mori-tecnico-model")
|
|
|
|
|
|
|
|
|
tokenizer_social = AutoTokenizer.from_pretrained("tecuhtli/mori-social-model")
|
|
|
model_social = AutoModelForSeq2SeqLM.from_pretrained("tecuhtli/mori-social-model")
|
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
modelo_social = modelo_social.to(device)
|
|
|
modelo_tecnico = modelo_tecnico.to(device)
|
|
|
|
|
|
st.title("🤖 Mori - Tu Asistente Personal")
|
|
|
st.header("Experto en Procesamiento de Datos 🐈")
|
|
|
|
|
|
|
|
|
|
|
|
with st.form("formulario_mori"):
|
|
|
entrada_usuario = st.text_area("📝 Escribe tu pregunta aquí", key="entrada", height=100)
|
|
|
submitted = st.form_submit_button("Responder")
|
|
|
|
|
|
if submitted:
|
|
|
|
|
|
opcion = detectar_intencion(entrada_usuario)
|
|
|
if opcion == "Técnica":
|
|
|
respuesta = "🧠 [Mori Técnico] " + responder_tecnico(entrada_usuario, modelo_tecnico, tokenizer_tecnico)
|
|
|
st.success(respuesta)
|
|
|
else:
|
|
|
respuesta = "🤝 [Mori Social] " + responder_social(entrada_usuario, modelo_social, tokenizer_social)
|
|
|
st.success(respuesta)
|
|
|
|
|
|
|
|
|
st.session_state.historial.append(("Mori", respuesta))
|
|
|
st.session_state.historial.append(("Tú", entrada_usuario))
|
|
|
|
|
|
|
|
|
guardar_interaccion_dual(entrada_usuario, respuesta, opcion, st.session_state["user_id"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if st.session_state.historial:
|
|
|
st.markdown("---")
|
|
|
for autor, texto in reversed(st.session_state.historial):
|
|
|
if autor == "Tú":
|
|
|
st.markdown(f"🧍♂️ **{autor}**: {texto}")
|
|
|
else:
|
|
|
st.markdown(f"🤖 **{autor}**: {texto}")
|
|
|
|
|
|
|
|
|
if st.session_state.historial:
|
|
|
texto_chat = ""
|
|
|
for autor, texto in st.session_state.historial:
|
|
|
texto_chat += f"{autor}: {texto}\n\n"
|
|
|
|
|
|
st.download_button(
|
|
|
label="💾 Descargar conversación como .txt",
|
|
|
data=texto_chat,
|
|
|
file_name="conversacion_mori.txt",
|
|
|
mime="text/plain"
|
|
|
)
|
|
|
|