from sentence_transformers import SentenceTransformer
import chromadb
import pandas as pd
import gradio as gr
from datetime import datetime
import tempfile
import os
# ===== Configuración =====
CHROMA_DIR = "chroma_db"
MODEL_NAME = "mrm8488/multilingual-e5-large-ft-sts-spanish-matryoshka-768-16-5e"
COLLECTION_NAMES = ["spc"]
# ===== 1. Conectar a la base de datos y cargar colecciones =====
print("Conectando a ChromaDB...")
client = chromadb.PersistentClient(path=CHROMA_DIR)
collections = [client.get_collection(name) for name in COLLECTION_NAMES]
# ===== 2. Cargar el modelo =====
print("Cargando modelo...")
model = SentenceTransformer(MODEL_NAME, trust_remote_code=True)
print("✓ Sistema listo")
# ===== 3. Función de búsqueda =====
def semantic_search(query: str, n_results: int = 20):
print(f"Buscando: {query}")
if not query.strip():
return pd.DataFrame(), ""
query_embedding = model.encode(query).tolist()
all_results = []
for collection in collections:
results = collection.query(
query_embeddings=[query_embedding],
n_results=n_results,
include=["documents", "metadatas", "distances"]
)
cosine_similarities = [1 - dist for dist in results['distances'][0]]
for i in range(len(results['ids'][0])):
result_dict = {
'Relevante': False, # Columna de selección al inicio
'ID': results['ids'][0][i],
'Similitud': round(cosine_similarities[i], 4),
'Texto': results['documents'][0][i],
'Colección': collection.name
}
metadata = results['metadatas'][0][i]
if metadata:
for key, value in metadata.items():
# Si hay URL, convertir a link
if key.lower() in ['url', 'link', 'enlace'] and value:
result_dict[key] = f'🔗 Abrir'
else:
result_dict[key] = value
all_results.append(result_dict)
df = pd.DataFrame(all_results).sort_values('Similitud', ascending=False).head(n_results)
df = df[['Número de Resolución', 'Fecha de Resolución', 'Texto', 'Enlace','Relevante']]
print(f"Resultados: {len(df)}")
return df, ""
# ===== 4. Función para exportar seleccionados a Excel =====
def export_to_excel(df_with_selection):
if df_with_selection is None or len(df_with_selection) == 0:
gr.Warning("No hay datos para exportar")
return None
# Filtrar solo los marcados como True en la columna 'Relevante'
df_selected = df_with_selection[df_with_selection['Relevante'] == True].copy()
if len(df_selected) == 0:
gr.Warning("No has seleccionado ninguna decisión")
return None
# Quitar la columna de checkbox del export
df_export = df_selected.drop(columns=['Relevante'])
# Limpiar HTML de los links para Excel
for col in df_export.columns:
if df_export[col].dtype == 'object':
df_export[col] = df_export[col].apply(
lambda x: x.replace('🔗 Abrir', '')
if isinstance(x, str) and '